From f48473e42511f8d37a239a07f791bc0a87209e5b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 08:59:47 -0600 Subject: r200/r300/r500: add _NEW_PROGRAM_CONSTANTS flag Make sure we detect constant buffer changes indicated by the new flag. Should be able to remove _NEW_PROGRAM (and _NEW_MODELVIEW, _NEW_LIGHT, etc) from several places (someday. --- src/mesa/drivers/dri/r200/r200_state.c | 3 ++- src/mesa/drivers/dri/r300/r300_fragprog.c | 3 ++- src/mesa/drivers/dri/r300/r300_state.c | 7 ++++--- src/mesa/drivers/dri/r300/r500_fragprog.c | 3 ++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 2fcc87c0f5a..28ba5f49bc0 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -2484,7 +2484,7 @@ void r200ValidateState( GLcontext *ctx ) r200UpdateDrawBuffer(ctx); } - if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) { r200UpdateTextureState( ctx ); new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ r200UpdateLocalViewer( ctx ); @@ -2523,6 +2523,7 @@ void r200ValidateState( GLcontext *ctx ) } if (new_state & (_NEW_PROGRAM| + _NEW_PROGRAM_CONSTANTS | /* need to test for pretty much anything due to possible parameter bindings */ _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 873cde44144..2f45429cf27 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -470,7 +470,8 @@ void r300TranslateFragmentShader(r300ContextPtr r300, fp->translated = GL_TRUE; if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) r300FragmentProgramDump(fp, &fp->code); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); } update_params(r300, fp); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 8095538ff94..2589f09cc88 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1107,7 +1107,7 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) struct gl_program_parameter_list *paramList; GLuint i; - if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; @@ -2355,11 +2355,12 @@ void r300UpdateShaders(r300ContextPtr rmesa) hw_tcl_on = future_hw_tcl_on = 0; r300ResetHwState(rmesa); - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + r300UpdateStateParameters(ctx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); return; } } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); } static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 292573de893..300559d0b4d 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -501,7 +501,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300, _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS); if (RADEON_DEBUG & DEBUG_PIXEL) { if (fp->translated) { -- cgit v1.2.3 From 817dcdd280cd749c3186bd3f00c06f41270aa884 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:07:02 -0600 Subject: i965: use new _NEW_PROGRAM_CONSTANTS flag instead of dynamic flags --- src/mesa/drivers/dri/i965/brw_curbe.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 18b187ed1dc..3c81899672e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -188,13 +188,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -422,7 +415,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ -- cgit v1.2.3 From 6b6a23c0f7e042d71764a2028f3d33b59076ac7c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:07:46 -0600 Subject: i965: updates to some debug code --- src/mesa/drivers/dri/i965/brw_curbe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 3c81899672e..da746e4aa05 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -352,11 +352,7 @@ update_constant_buffer(struct brw_context *brw, dri_bo_unmap(const_buffer); if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } + _mesa_print_parameter_list(params); } } } @@ -369,7 +365,7 @@ update_vertex_constant_buffer(struct brw_context *brw) struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); + printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp); printf("program %u\n", vp->program.Base.Id); } update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); @@ -382,6 +378,10 @@ update_fragment_constant_buffer(struct brw_context *brw) { struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; + if (0) { + printf("update WM constants in buffer %p\n", fp->const_buffer); + printf("program %u\n", fp->program.Base.Id); + } update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); } -- cgit v1.2.3 From 1dbab84e21cad81e971265db3dbc8dc6c344b340 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:14:16 -0600 Subject: i965: use _NEW_PROGRAM_CONSTANTS and always create new const buffers When program constants change we create a new VS constant buffer instead of re-using the old one. This allows us to have several const buffers in flight with vertex rendering. --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 28 ++++++++++++------------ 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 71840d1e4e8..89c456e62cc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -456,17 +456,14 @@ brw_update_vs_constant_surface( GLcontext *ctx, assert(surf == 0); - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* We always create a new VS constant buffer so that several can be + * in flight at a time. Free the old one first... + */ + dri_bo_unreference(const_buffer); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } + /* alloc new buffer */ + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); memset(&key, 0, sizeof(key)); @@ -783,8 +780,7 @@ brw_vs_get_binding_table(struct brw_context *brw) /** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. + * Vertex shader surfaces (constant buffer). */ static void prepare_vs_surfaces(struct brw_context *brw ) { @@ -820,8 +816,12 @@ prepare_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS | + _NEW_PROGRAM | + _NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_CONTEXT), .cache = 0 }, .prepare = prepare_surfaces, -- cgit v1.2.3 From a36dd5d54e3de5662c694e764d1c49795ddb6814 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:14:53 -0600 Subject: i915: check the new _NEW_PROGRAM_CONSTANT flag --- src/mesa/drivers/dri/i915/i915_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index fdd2cf61096..45ba2d14a5d 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) p->params_uptodate = 0; } - if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); } -- cgit v1.2.3 From e5681fc176bc43bc6c7804bd1e8d8557cdcab345 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:16:21 -0600 Subject: i965: add _NEW_PROGRAM_CONSTANTS to mesa_bits[] list --- src/mesa/drivers/dri/i965/brw_state_upload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e612..197efeb1b73 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -218,6 +218,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; -- cgit v1.2.3 From f428255bde93a452a7cdd48fba21839c99beb6cb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 09:23:15 -0600 Subject: i965: the brw_constant_buffer state atom is no longer dynamic No more dynamic atoms so we can simplify the state validation code a little. --- src/mesa/drivers/dri/i965/brw_context.h | 7 ------- src/mesa/drivers/dri/i965/brw_state_upload.c | 31 +++++----------------------- 2 files changed, 5 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a0b3b06309f..af9fef5e22b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -448,8 +448,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -553,11 +551,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 197efeb1b73..491e2e2452b 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -102,39 +102,18 @@ const struct brw_tracked_state *atoms[] = &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - brw_destroy_cache(brw); brw_destroy_batch_cache(brw); } @@ -337,7 +316,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -368,8 +347,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -398,7 +377,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; -- cgit v1.2.3 From a071a8d2e72e52e6a8906448b171756c8920ce96 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 10:44:36 -0600 Subject: i965: remove unused state atom entries --- src/mesa/drivers/dri/i965/brw_state_upload.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 491e2e2452b..20892cdf322 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,7 +59,6 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, @@ -88,15 +87,8 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, -- cgit v1.2.3 From f9af97c7a5d81226a87d79baf8fb00231c96398d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:08:46 -0600 Subject: i965: checkpoint commit: use two state caches instead of one The new, second cache will only be used for surface-related items. Since we can create many surfaces the original, single cache could get filled quickly. When we cleared it, we had to regenerate shaders, etc. With two caches, we can avoid doing that. --- src/mesa/drivers/dri/i965/brw_context.h | 3 +- src/mesa/drivers/dri/i965/brw_state.h | 4 +- src/mesa/drivers/dri/i965/brw_state_cache.c | 122 ++++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_state_upload.c | 4 +- 4 files changed, 88 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index af9fef5e22b..cad711d18ab 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -467,7 +467,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998f..7ea2fc113ca 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -135,8 +135,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b51664066..3b23a8b755a 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -320,20 +320,20 @@ enum pool_type { }; static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache( struct brw_context *brw ) { struct brw_cache *cache = &brw->cache; @@ -342,114 +342,145 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +#if 1 + brw_init_cache_id(cache, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(cache, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); +#endif +} + +static void +brw_init_surface_cache( struct brw_context *brw ) +{ + struct brw_cache *cache = &brw->surface_cache; - brw_init_cache_id(brw, + cache->brw = brw; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } +void brw_init_caches( struct brw_context *brw ) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +488,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +499,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -489,19 +520,30 @@ void brw_state_cache_check_size( struct brw_context *brw ) * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + +void brw_destroy_caches( struct brw_context *brw ) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 20892cdf322..2641bcb2aa8 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -100,13 +100,13 @@ const struct brw_tracked_state *atoms[] = void brw_init_state( struct brw_context *brw ) { - brw_init_cache(brw); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } -- cgit v1.2.3 From 4843e54fc69daf379dea9899673b3df92b44049c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:12:07 -0600 Subject: i965: actually use the new, second surface state cache --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 39 +++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 89c456e62cc..74f3f1791e8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -268,7 +268,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +321,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -362,7 +363,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -427,7 +428,8 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); @@ -484,7 +486,8 @@ brw_update_vs_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); @@ -563,10 +566,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, dri_bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = NULL; if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -591,7 +595,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -623,7 +628,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -639,7 +644,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -739,7 +744,7 @@ brw_vs_get_binding_table(struct brw_context *brw) assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, brw->vs.nr_surfaces, NULL); @@ -755,7 +760,7 @@ brw_vs_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->vs.surf_bo, brw->vs.nr_surfaces, data, data_size, -- cgit v1.2.3 From c0c58cf5cfc11b9256287871660cc16966e662ef Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:28:24 -0600 Subject: i965: comments, reformatting --- src/mesa/drivers/dri/i965/brw_state_cache.c | 55 ++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 3b23a8b755a..cbae68798cd 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,6 +330,7 @@ enum pool_type { DW_GENERAL_STATE }; + static void brw_init_cache_id(struct brw_cache *cache, const char *name, @@ -333,7 +345,7 @@ brw_init_cache_id(struct brw_cache *cache, static void -brw_init_non_surface_cache( struct brw_context *brw ) +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -433,6 +445,7 @@ brw_init_non_surface_cache( struct brw_context *brw ) BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); + #if 1 brw_init_cache_id(cache, "SS_SURFACE", @@ -448,8 +461,9 @@ brw_init_non_surface_cache( struct brw_context *brw ) #endif } + static void -brw_init_surface_cache( struct brw_context *brw ) +brw_init_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->surface_cache; @@ -473,14 +487,17 @@ brw_init_surface_cache( struct brw_context *brw ) 0); } -void brw_init_caches( struct brw_context *brw ) + +void +brw_init_caches(struct brw_context *brw) { brw_init_non_surface_cache(brw); brw_init_surface_cache(brw); } + static void -brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -514,7 +531,9 @@ brw_clear_cache( struct brw_context *brw, struct brw_cache *cache ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -542,7 +561,9 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) cache->size = 0; } -void brw_destroy_caches( struct brw_context *brw ) + +void +brw_destroy_caches(struct brw_context *brw) { brw_destroy_cache(brw, &brw->cache); brw_destroy_cache(brw, &brw->surface_cache); -- cgit v1.2.3 From 21a422d97e501f4ca68ab24ad3fe5f5eb1393349 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:29:49 -0600 Subject: i965: remove old code to init surface-related cache IDs These types are only found in the new surface state cache now. --- src/mesa/drivers/dri/i965/brw_state_cache.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index cbae68798cd..320d886c992 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -445,20 +445,6 @@ brw_init_non_surface_cache(struct brw_context *brw) BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); - -#if 1 - brw_init_cache_id(cache, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0); - - brw_init_cache_id(cache, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - 0, - 0); -#endif } -- cgit v1.2.3 From 5c8fb6acc10662c9e71078c9f273db6c7808e9ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:47:59 -0600 Subject: i965: define BRW_MAX_GRF --- src/mesa/drivers/dri/i965/brw_context.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index cad711d18ab..f0d4993e117 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -243,6 +243,9 @@ struct brw_vs_ouput_sizes { }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -- cgit v1.2.3 From ac22178eb049126003db40b0a77a111498a12ab7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 11:52:16 -0600 Subject: i965: enable VS constant buffers In the VS constants can now be handled in two different ways: 1. If there's room in the GRF, put constants there. They're preloaded from the CURBE prior to VS execution. This is the historical approach. The problem is the GRF may not have room for all the shader's constants and temps and misc registers. Hence... 2. Use a separate constant buffer which is read from using a READ message. This allows a very large number of constants and frees up GRF regs for shader temporaries. This is the new approach. May be a little slower than 1. 1 vs. 2 is chosen according to how many constants and temps the shader needs. --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 524f1211cee..1da5a3f5023 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->use_const_buffer = GL_TRUE; else -#endif c->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + printf("use_const_buffer = %d\n", c->use_const_buffer); /* r0 -- reserved as usual */ -- cgit v1.2.3 From ebfbd8c4fef78e3cd9604660e5bb96e3c6df07e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Apr 2009 13:46:58 -0600 Subject: i965: disable debug printf --- src/mesa/drivers/dri/i965/brw_vs_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 1da5a3f5023..c2b37027988 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -79,7 +79,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->use_const_buffer = GL_TRUE; else c->use_const_buffer = GL_FALSE; - printf("use_const_buffer = %d\n", c->use_const_buffer); + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual */ -- cgit v1.2.3 From 027ed1b505a1bf6e3f5ad4412734d7edf337c08b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 24 Apr 2009 09:43:44 -0600 Subject: mesa: signal _NEW_PROGRAM_CONSTANTS instead of _NEW_PROGRAM Use _NEW_PROGRAM_CONSTANTS when changing constant/uniform buffer values. Binding a new program/shader sets both _NEW_PROGRAM and _NEW_PROGRAM_CONSTANTS. --- src/mesa/shader/arbprogram.c | 13 +++++++------ src/mesa/shader/nvprogram.c | 2 +- src/mesa/shader/shader_api.c | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/mesa/shader/arbprogram.c b/src/mesa/shader/arbprogram.c index 981565ab8f1..317d623a228 100644 --- a/src/mesa/shader/arbprogram.c +++ b/src/mesa/shader/arbprogram.c @@ -74,8 +74,6 @@ _mesa_BindProgram(GLenum target, GLuint id) GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - /* Error-check target and get curProg */ if ((target == GL_VERTEX_PROGRAM_ARB) && /* == GL_VERTEX_PROGRAM_NV */ (ctx->Extensions.NV_vertex_program || @@ -132,6 +130,9 @@ _mesa_BindProgram(GLenum target, GLuint id) return; } + /* signal new program (and its new constants) */ + FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + /* bind newProg */ if (target == GL_VERTEX_PROGRAM_ARB) { /* == GL_VERTEX_PROGRAM_NV */ _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, @@ -489,7 +490,7 @@ _mesa_ProgramEnvParameter4fARB(GLenum target, GLuint index, GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (target == GL_FRAGMENT_PROGRAM_ARB && ctx->Extensions.ARB_fragment_program) { @@ -537,7 +538,7 @@ _mesa_ProgramEnvParameters4fvEXT(GLenum target, GLuint index, GLsizei count, GLfloat * dest; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (count <= 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glProgramEnvParameters4fv(count)"); @@ -631,7 +632,7 @@ _mesa_ProgramLocalParameter4fARB(GLenum target, GLuint index, struct gl_program *prog; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if ((target == GL_FRAGMENT_PROGRAM_NV && ctx->Extensions.NV_fragment_program) || @@ -685,7 +686,7 @@ _mesa_ProgramLocalParameters4fvEXT(GLenum target, GLuint index, GLsizei count, GLint i; ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); if (count <= 0) { _mesa_error(ctx, GL_INVALID_VALUE, "glProgramLocalParameters4fv(count)"); diff --git a/src/mesa/shader/nvprogram.c b/src/mesa/shader/nvprogram.c index 5142c2a4a59..8ba521182b8 100644 --- a/src/mesa/shader/nvprogram.c +++ b/src/mesa/shader/nvprogram.c @@ -706,7 +706,7 @@ _mesa_ProgramNamedParameter4fNV(GLuint id, GLsizei len, const GLubyte *name, GET_CURRENT_CONTEXT(ctx); ASSERT_OUTSIDE_BEGIN_END(ctx); - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); prog = _mesa_lookup_program(ctx, id); if (!prog || prog->Target != GL_FRAGMENT_PROGRAM_NV) { diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 644cd39185c..8f414a08891 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -1487,7 +1487,7 @@ _mesa_use_program(GLcontext *ctx, GLuint program) return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM); + FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); if (program) { shProg = _mesa_lookup_shader_program_err(ctx, program, "glUseProgram"); @@ -1789,7 +1789,7 @@ _mesa_uniform(GLcontext *ctx, GLint location, GLsizei count, return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); uniform = &shProg->Uniforms->Uniforms[location]; @@ -1929,7 +1929,7 @@ _mesa_uniform_matrix(GLcontext *ctx, GLint cols, GLint rows, return; } - FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + FLUSH_VERTICES(ctx, _NEW_PROGRAM_CONSTANTS); uniform = &shProg->Uniforms->Uniforms[location]; -- cgit v1.2.3 From 7da3f9403b235394a5c7e9456e34a0c9dad7dd15 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 24 Apr 2009 16:28:36 -0600 Subject: mesa: refactor code and make _mesa_find_temp_intervals() public --- src/mesa/shader/prog_optimize.c | 154 ++++++++++++++++++++++++++++++++++------ src/mesa/shader/prog_optimize.h | 12 ++++ 2 files changed, 144 insertions(+), 22 deletions(-) diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index 6ba2e76ff96..a02f5efa417 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -547,15 +547,13 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) /** - * Find the live intervals for each temporary register in the program. - * For register R, the interval [A,B] indicates that R is referenced - * from instruction A through instruction B. - * Special consideration is needed for loops and subroutines. - * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + * Find first/last instruction that references each temporary register. */ -static GLboolean -find_live_intervals(struct gl_program *prog, - struct interval_list *liveIntervals) +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) { struct loop_info { @@ -563,26 +561,15 @@ find_live_intervals(struct gl_program *prog, }; struct loop_info loopStack[MAX_LOOP_NESTING]; GLuint loopStackDepth = 0; - GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; GLuint i; - /* - * Note: we'll return GL_FALSE below if we find relative indexing - * into the TEMP register file. We can't handle that yet. - * We also give up on subroutines for now. - */ - - if (dbg) { - _mesa_printf("Optimize: Begin find intervals\n"); - } - for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ intBegin[i] = intEnd[i] = -1; } /* Scan instructions looking for temporary registers */ - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; if (inst->Opcode == OPCODE_BGNLOOP) { loopStack[loopStackDepth].Start = i; loopStack[loopStackDepth].End = inst->BranchTarget; @@ -595,7 +582,7 @@ find_live_intervals(struct gl_program *prog, return GL_FALSE; } else { - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ GLuint j; for (j = 0; j < numSrc; j++) { if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { @@ -624,6 +611,39 @@ find_live_intervals(struct gl_program *prog, } } + return GL_TRUE; +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + /* build intermediate arrays */ + if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, + intBegin, intEnd)) + return GL_FALSE; + /* Build live intervals list from intermediate arrays */ liveIntervals->Num = 0; for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { @@ -794,6 +814,96 @@ _mesa_reallocate_registers(struct gl_program *prog) + + + + +#if 0 +static void +_mesa_find_temporary_live_intervals(struct gl_program *prog, + GLint firstInst[MAX_PROGRAM_TEMPS], + GLint lastInst[MAX_PROGRAM_TEMPS]) +{ + GLuint i; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + firstInst[i] = lastInst[i] = -1; + } + + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + + + +#endif + + + + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. diff --git a/src/mesa/shader/prog_optimize.h b/src/mesa/shader/prog_optimize.h index d102cfd9fc1..43894a27237 100644 --- a/src/mesa/shader/prog_optimize.h +++ b/src/mesa/shader/prog_optimize.h @@ -25,7 +25,19 @@ #ifndef PROG_OPT_H #define PROG_OPT_H + +#include "main/config.h" + + struct gl_program; +struct prog_instruction; + + +extern GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]); extern void _mesa_optimize_program(GLcontext *ctx, struct gl_program *program); -- cgit v1.2.3 From b58b3a786aa38dcc9d72144c2cc691151e46e3d5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 24 Apr 2009 16:33:46 -0600 Subject: i965: rework GLSL/WM register allocation Use a bitvector of used/free flags. If we run out of temps, examine the live intervals of the temp regs in the program and free those which are no longer alive. Also, enable the new WM const buffer code. --- src/mesa/drivers/dri/i965/brw_wm.h | 5 +- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 211 +++++++++++++++++++++++++------- 2 files changed, 168 insertions(+), 48 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d0ab3bdc65f..75205fddb7f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -240,15 +240,18 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 22e17622c6d..3471c1946ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -42,6 +44,76 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + /*printf("Really out of temp regs!\n");*/ + return 60; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -68,11 +140,18 @@ static int get_scalar_dst_index(const struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + c->tmp_regs[ c->tmp_max++ ] = alloc_grf(c); + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +209,26 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + if (grf < 0) { + /* totally out of temps */ + grf = 70; /* XXX !!!! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +238,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +289,10 @@ static void prealloc_reg(struct brw_wm_compile *c) struct brw_reg reg; int nr_interp_regs = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +301,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->use_const_buffer = GL_TRUE; + else + c->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->use_const_buffer);*/ if (c->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +336,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,7 +346,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } @@ -234,20 +354,24 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<reg_index, 0); + reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - c->reg_index += 2; + reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -256,13 +380,9 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } -#if 0 - printf("USE CONST BUFFER? %d\n", c->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); -#endif } @@ -2595,7 +2715,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2603,6 +2722,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2833,17 +2954,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2867,6 +2984,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } -- cgit v1.2.3 From 777b9ff43e88e456d686208c83712f26aba2dd95 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Apr 2009 10:45:41 -0600 Subject: i965: only upload constant buffer data when we actually need the const buffer Make the use_const_buffer field per-program and only call the code which updates the constant buffer's data if the flag is set. This should undo the perf regression from 20f3497e4b6756e330f7b3f54e8acaa1d6c92052 (cherry picked from master, commit dc9705d12d162ba6d087eb762e315de9f97bc456) --- src/mesa/drivers/dri/i965/brw_context.h | 2 ++ src/mesa/drivers/dri/i965/brw_curbe.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_vs.h | 2 -- src/mesa/drivers/dri/i965/brw_vs_emit.c | 11 ++++++----- src/mesa/drivers/dri/i965/brw_wm.h | 2 -- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 16 ++++++++++------ 6 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index f0d4993e117..838e718d0d5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -161,6 +161,7 @@ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; @@ -171,6 +172,7 @@ struct brw_fragment_program { GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; }; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index da746e4aa05..e6e26cdc406 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -368,7 +368,8 @@ update_vertex_constant_buffer(struct brw_context *brw) printf("update VS constants in buffer %p vp = %p\n", vp->const_buffer, vp); printf("program %u\n", vp->program.Base.Id); } - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); + if (vp->use_const_buffer) + update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); } @@ -382,7 +383,8 @@ update_fragment_constant_buffer(struct brw_context *brw) printf("update WM constants in buffer %p\n", fp->const_buffer); printf("program %u\n", fp->program.Base.Id); } - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); + if (fp->use_const_buffer) + update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index d20cf78b8af..1e4f66091e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,8 +75,6 @@ struct brw_vs_compile { struct brw_reg userplane[6]; - /** using a real constant buffer? */ - GLboolean use_const_buffer; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { GLint index; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index c2b37027988..b9a338b1cd8 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -76,9 +76,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ if (c->vp->program.Base.Parameters->NumParameters + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) - c->use_const_buffer = GL_TRUE; + c->vp->use_const_buffer = GL_TRUE; else - c->use_const_buffer = GL_FALSE; + c->vp->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ /* r0 -- reserved as usual @@ -100,7 +101,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { /* get constants from a real constant buffer */ c->prog_data.curb_read_length = 0; c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ @@ -176,7 +177,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(reg, 0); @@ -873,7 +874,7 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: - if (c->use_const_buffer) { + if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } else if (relAddr) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 75205fddb7f..2f80a60c12b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -257,8 +257,6 @@ struct brw_wm_compile { GLuint tmp_max; GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; - /** using a real constant buffer? */ - GLboolean use_const_buffer; /** we may need up to 3 constants per instruction (if use_const_buffer) */ struct { GLint index; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 3471c1946ec..eca4ca2c821 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -311,12 +311,12 @@ static void prealloc_reg(struct brw_wm_compile *c) /* use a real constant buffer, or just use a section of the GRF? */ /* XXX this heuristic may need adjustment... */ if ((nr_params + nr_temps) * 4 + reg_index > 80) - c->use_const_buffer = GL_TRUE; + c->fp->use_const_buffer = GL_TRUE; else - c->use_const_buffer = GL_FALSE; + c->fp->use_const_buffer = GL_FALSE; /*printf("WM use_const_buffer = %d\n", c->use_const_buffer);*/ - if (c->use_const_buffer) { + if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from * it with a dataport read message. */ @@ -377,12 +377,16 @@ static void prealloc_reg(struct brw_wm_compile *c) * They'll be found in these registers. * XXX alloc these on demand! */ - if (c->use_const_buffer) { + if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); +#endif } @@ -488,7 +492,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); - if (c->use_const_buffer && + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM)) { @@ -2730,7 +2734,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) #endif /* fetch any constants that this instruction needs */ - if (c->use_const_buffer) + if (c->fp->use_const_buffer) fetch_constants(c, inst); if (inst->CondUpdate) -- cgit v1.2.3 From dd4802176f7751e8c38c000687ff9cb9633649aa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Apr 2009 10:46:30 -0600 Subject: i965: #include prog_print.h to silence warning --- src/mesa/drivers/dri/i965/brw_curbe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index e6e26cdc406..f6d2014fb1e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" -- cgit v1.2.3 From dca190e9432d4ed122bdd534922d0c3d85791c6a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Apr 2009 11:52:06 -0600 Subject: mesa: added _mesa_check_soa_dependencies() function This function will check an instruction to see if there's data dependencies between the dst and src registers if executed in an SOA manner. --- src/mesa/shader/prog_instruction.c | 50 ++++++++++++++++++++++++++++++++++++++ src/mesa/shader/prog_instruction.h | 3 +++ 2 files changed, 53 insertions(+) diff --git a/src/mesa/shader/prog_instruction.c b/src/mesa/shader/prog_instruction.c index ca7565c0911..ae3a003feed 100644 --- a/src/mesa/shader/prog_instruction.c +++ b/src/mesa/shader/prog_instruction.c @@ -285,6 +285,56 @@ _mesa_is_tex_instruction(gl_inst_opcode opcode) } +/** + * Check if there's a potential src/dst register data dependency when + * using SOA execution. + * Example: + * MOV T, T.yxwz; + * This would expand into: + * MOV t0, t1; + * MOV t1, t0; + * MOV t2, t3; + * MOV t3, t2; + * The second instruction will have the wrong value for t0 if executed as-is. + */ +GLboolean +_mesa_check_soa_dependencies(const struct prog_instruction *inst) +{ + GLuint i, chan; + + if (inst->DstReg.WriteMask == WRITEMASK_X || + inst->DstReg.WriteMask == WRITEMASK_Y || + inst->DstReg.WriteMask == WRITEMASK_Z || + inst->DstReg.WriteMask == WRITEMASK_W || + inst->DstReg.WriteMask == 0x0) { + /* no chance of data dependency */ + return GL_FALSE; + } + + /* loop over src regs */ + for (i = 0; i < 3; i++) { + if (inst->SrcReg[i].File == inst->DstReg.File && + inst->SrcReg[i].Index == inst->DstReg.Index) { + /* loop over dest channels */ + GLuint channelsWritten = 0x0; + for (chan = 0; chan < 4; chan++) { + if (inst->DstReg.WriteMask & (1 << chan)) { + /* check if we're reading a channel that's been written */ + GLuint swizzle = GET_SWZ(inst->SrcReg[i].Swizzle, chan); + if (swizzle <= SWIZZLE_W && + (channelsWritten & (1 << swizzle))) { + return GL_TRUE; + } + + channelsWritten |= (1 << chan); + } + } + } + } + return GL_FALSE; +} + + /** * Return string name for given program opcode. */ diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index 3109f6cbae5..40ad998f79d 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -428,6 +428,9 @@ _mesa_num_inst_dst_regs(gl_inst_opcode opcode); extern GLboolean _mesa_is_tex_instruction(gl_inst_opcode opcode); +extern GLboolean +_mesa_check_soa_dependencies(const struct prog_instruction *inst); + extern const char * _mesa_opcode_string(gl_inst_opcode opcode); -- cgit v1.2.3