diff options
author | Kenneth Graunke <[email protected]> | 2012-08-24 01:22:58 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2012-08-25 12:01:10 -0700 |
commit | 85e8e9e000732908b259a7e2cbc1724a1be2d447 (patch) | |
tree | 699e7f922f3091087cb158bbbcc8116b526cbfb5 | |
parent | 2faa592e7f54ef21b799b61ffa50c6bc8039ddc1 (diff) |
i965: Use linker-assigned sampler IDs in instruction encoding.
When assigning uniform locations, the linker assigns each sampler
uniform a sequential numerical ID. gl_shader_program::SamplerUnits maps
these sampler variable IDs to the actual texture units they reference
(specified via glUniform1i).
Previously, we encoded this mapping in the SEND instruction encoding:
the "sampler" was the texture unit number, and the binding table index
was SURF_INDEX_TEXTURE(the texture unit number). This unfortunately
meant that whenever the application changed the value of a sampler
uniform, we had to recompile the shader to change the SEND instructions.
This was horrible for the game Cogs, which repeatedly switches between
using texture unit 0 and 1. It also made fragment shader precompiles
useless: we'd do the precompile at glLinkShader() time, before the
application called glUniform1i to set the sampler values. As soon as
it did that, we'd have to recompile, wasting time and space in the
program cache.
This patch encodes the SamplerUnits indirection in the binding table,
sampler state, and sampler default color tables. Instead of baking the
texture unit number into the shader, we bake in the sampler variable ID
assigned by the linker. Since those never change, we don't need to
recompile programs on uniform changes.
This does mean that the tables now depend on the linked shader program
being used for rendering, rather than simply representing all available
texture units. This could cause an increase in state emission.
Another plus is that the sampler state and sampler default color tables
are now compact: we only emit as many entries as there are sampler
uniforms, with no holes in the table since the new sampler IDs are
sequential. Previously we had to emit a full 16 entries every time,
since the tables tracked the state of all active texture units.
Signed-off-by: Kenneth Graunke <[email protected]>
Acked-by: Paul Berry <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 27 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 51 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_sampler_state.c | 27 |
5 files changed, 74 insertions, 35 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 601f83c073a..e3d3a98bbdf 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1345,7 +1345,7 @@ fs_visitor::visit(ir_texture *ir) if (ir->offset != NULL && ir->op != ir_txf) inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); - inst->sampler = texunit; + inst->sampler = sampler; if (ir->shadow_comparitor) inst->shadow_compare = true; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index c22ba6067cc..629ecb0b6ea 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1897,7 +1897,7 @@ vec4_visitor::visit(ir_texture *ir) inst->header_present = ir->offset || intel->gen < 5; inst->base_mrf = 2; inst->mlen = inst->header_present + 1; /* always at least one */ - inst->sampler = texunit; + inst->sampler = sampler; inst->dst = dst_reg(this, ir->type); inst->shadow_compare = ir->shadow_comparitor != NULL; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 10deb1da0fb..610ef345f42 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -334,13 +334,14 @@ brw_upload_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct brw_sampler_state *samplers; - int i; - brw->sampler.count = 0; - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw->sampler.count = i + 1; - } + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; + + GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; + + brw->sampler.count = _mesa_bitcount(SamplersUsed); if (brw->sampler.count == 0) return; @@ -350,9 +351,13 @@ brw_upload_samplers(struct brw_context *brw) 32, &brw->sampler.offset); memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); - for (i = 0; i < brw->sampler.count; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw_update_sampler_state(brw, i, i, &samplers[i]); + for (unsigned s = 0; s < brw->sampler.count; s++) { + if (SamplersUsed & (1 << s)) { + const unsigned unit = (fs->SamplersUsed & (1 << s)) ? + fs->SamplerUnits[s] : vs->SamplerUnits[s]; + if (ctx->Texture.Unit[unit]._ReallyEnabled) + brw_update_sampler_state(brw, unit, s, &samplers[s]); + } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; @@ -361,7 +366,9 @@ brw_upload_samplers(struct brw_context *brw) const struct brw_tracked_state brw_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = brw_upload_samplers, diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0c87b841d71..eefa427f1a5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1238,22 +1238,45 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = { static void brw_update_texture_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM: + * Unfortunately, we're stuck using the gl_program structs until the + * ARB_fragment_program front-end gets converted to GLSL IR. These + * have the downside that SamplerUnits is split and only contains the + * mappings for samplers active in that stage. + */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; - for (unsigned i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint surf = SURF_INDEX_TEXTURE(i); + unsigned num_samplers = _mesa_bitcount(vs->SamplersUsed | fs->SamplersUsed); - /* _NEW_TEXTURE */ - if (texUnit->_ReallyEnabled) { - brw->intel.vtbl.update_texture_surface(ctx, i, brw->wm.surf_offset, surf); - } else { - brw->wm.surf_offset[surf] = 0; + for (unsigned s = 0; s < num_samplers; s++) { + brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0; + brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0; + + if (vs->SamplersUsed & (1 << s)) { + const unsigned unit = vs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->vs.surf_offset, + SURF_INDEX_VS_TEXTURE(s)); + } } - /* For now, just mirror the texture setup to the VS slots. */ - brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] = - brw->wm.surf_offset[surf]; + if (fs->SamplersUsed & (1 << s)) { + const unsigned unit = fs->SamplerUnits[s]; + + /* _NEW_TEXTURE */ + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + intel->vtbl.update_texture_surface(ctx, unit, + brw->wm.surf_offset, + SURF_INDEX_TEXTURE(s)); + } + } } brw->state.dirty.brw |= BRW_NEW_SURFACES; @@ -1262,7 +1285,9 @@ brw_update_texture_surfaces(struct brw_context *brw) const struct brw_tracked_state brw_texture_surfaces = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = brw_update_texture_surfaces, diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index c3b67e85140..ea634a9be09 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -188,13 +188,14 @@ gen7_upload_samplers(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct gen7_sampler_state *samplers; - int i; - brw->sampler.count = 0; - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - brw->sampler.count = i + 1; - } + /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */ + struct gl_program *vs = (struct gl_program *) brw->vertex_program; + struct gl_program *fs = (struct gl_program *) brw->fragment_program; + + GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; + + brw->sampler.count = _mesa_bitcount(SamplersUsed); if (brw->sampler.count == 0) return; @@ -204,9 +205,13 @@ gen7_upload_samplers(struct brw_context *brw) 32, &brw->sampler.offset); memset(samplers, 0, brw->sampler.count * sizeof(*samplers)); - for (i = 0; i < brw->sampler.count; i++) { - if (ctx->Texture.Unit[i]._ReallyEnabled) - gen7_update_sampler_state(brw, i, i, &samplers[i]); + for (unsigned s = 0; s < brw->sampler.count; s++) { + if (SamplersUsed & (1 << s)) { + const unsigned unit = (fs->SamplersUsed & (1 << s)) ? + fs->SamplerUnits[s] : vs->SamplerUnits[s]; + if (ctx->Texture.Unit[unit]._ReallyEnabled) + gen7_update_sampler_state(brw, unit, s, &samplers[s]); + } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; @@ -215,7 +220,9 @@ gen7_upload_samplers(struct brw_context *brw) const struct brw_tracked_state gen7_samplers = { .dirty = { .mesa = _NEW_TEXTURE, - .brw = BRW_NEW_BATCH, + .brw = BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_FRAGMENT_PROGRAM, .cache = 0 }, .emit = gen7_upload_samplers, |