From decc708c7c3ab53922cf3ac94cd74231196fd0cb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 21 Aug 2012 23:54:19 -0700 Subject: i965: Upload separate per-stage sampler state tables. Also upload separate sampler default/texture border color entries. At the moment, this is completely idiotic: both tables contain exactly the same contents, so we're simply wasting batch space and CPU time. However, soon we'll only upload data for textures actually /used/ in a particular stage, which will usually make the VS table empty and very likely eliminate all redundancy. This is just a stepping stone. Signed-off-by: Kenneth Graunke Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_context.h | 21 ++++++++------ src/mesa/drivers/dri/i965/brw_vs_state.c | 4 +-- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 36 +++++++++++++++++------- src/mesa/drivers/dri/i965/brw_wm_state.c | 8 +++--- src/mesa/drivers/dri/i965/gen6_sampler_state.c | 4 +-- src/mesa/drivers/dri/i965/gen7_sampler_state.c | 33 ++++++++++++++++------ src/mesa/drivers/dri/i965/gen7_vs_state.c | 2 +- src/mesa/drivers/dri/i965/gen7_wm_state.c | 2 +- 8 files changed, 72 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d7c3472fa2e..1715c56aa72 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1064,11 +1064,6 @@ struct brw_context GLuint last_bufsz; } curbe; - /** SAMPLER_STATE count and offset */ - struct { - uint32_t offset; - } sampler; - /** * Layout of vertex data exiting the geometry portion of the pipleine. * This comes from the geometry shader if one exists, otherwise from the @@ -1110,7 +1105,13 @@ struct brw_context uint32_t bind_bo_offset; uint32_t surf_offset[BRW_MAX_VS_SURFACES]; + /** SAMPLER_STATE count and table offset */ uint32_t sampler_count; + uint32_t sampler_offset; + + /** Offsets in the batch to sampler default colors (texture border color) + */ + uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; } vs; struct { @@ -1153,10 +1154,6 @@ struct brw_context struct { struct brw_wm_prog_data *prog_data; - /** offsets in the batch to sampler default colors (texture border color) - */ - uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; - GLuint render_surf; drm_intel_bo *scratch_bo; @@ -1184,7 +1181,13 @@ struct brw_context uint32_t bind_bo_offset; uint32_t surf_offset[BRW_MAX_WM_SURFACES]; + /** SAMPLER_STATE count and table offset */ uint32_t sampler_count; + uint32_t sampler_offset; + + /** Offsets in the batch to sampler default colors (texture border color) + */ + uint32_t sdc_offset[BRW_MAX_TEX_UNIT]; struct { struct ra_regs *regs; diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 13aabac43d9..a8729df336c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -157,12 +157,12 @@ brw_upload_vs_unit(struct brw_context *brw) */ if (brw->vs.sampler_count) { vs->vs5.sampler_state_pointer = - (brw->batch.bo->offset + brw->sampler.offset) >> 5; + (brw->batch.bo->offset + brw->vs.sampler_offset) >> 5; drm_intel_bo_emit_reloc(brw->batch.bo, brw->vs.state_offset + offsetof(struct brw_vs_unit_state, vs5), brw->batch.bo, - brw->sampler.offset | vs->vs5.sampler_count, + brw->vs.sampler_offset | vs->vs5.sampler_count, I915_GEM_DOMAIN_INSTRUCTION, 0); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index ad788644a01..e2b4b8dad36 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -195,6 +195,7 @@ static void brw_update_sampler_state(struct brw_context *brw, int unit, int ss_index, struct brw_sampler_state *sampler, + uint32_t sampler_state_table_offset, uint32_t *sdc_offset) { struct gl_context *ctx = &brw->ctx; @@ -347,7 +348,7 @@ static void brw_update_sampler_state(struct brw_context *brw, *sdc_offset) >> 5; drm_intel_bo_emit_reloc(brw->batch.bo, - brw->sampler.offset + + sampler_state_table_offset + ss_index * sizeof(struct brw_sampler_state) + offsetof(struct brw_sampler_state, ss2), brw->batch.bo, *sdc_offset, @@ -366,7 +367,10 @@ static void brw_update_sampler_state(struct brw_context *brw, static void -brw_upload_samplers(struct brw_context *brw) +brw_upload_sampler_state_table(struct brw_context *brw, + uint32_t *sampler_count, + uint32_t *sst_offset, + uint32_t *sdc_offset) { struct gl_context *ctx = &brw->ctx; struct brw_sampler_state *samplers; @@ -380,17 +384,15 @@ brw_upload_samplers(struct brw_context *brw) /* ARB programs use the texture unit number as the sampler index, so we * need to find the highest unit used. A bit-count will not work. */ - brw->wm.sampler_count = _mesa_fls(SamplersUsed); - /* Currently we only use one sampler state table. Mirror the count. */ - brw->vs.sampler_count = brw->wm.sampler_count; + *sampler_count = _mesa_fls(SamplersUsed); - if (brw->wm.sampler_count == 0) + if (*sampler_count == 0) return; samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, - brw->wm.sampler_count * sizeof(*samplers), - 32, &brw->sampler.offset); - memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); + *sampler_count * sizeof(*samplers), + 32, sst_offset); + memset(samplers, 0, *sampler_count * sizeof(*samplers)); for (unsigned s = 0; s < brw->wm.sampler_count; s++) { if (SamplersUsed & (1 << s)) { @@ -398,13 +400,27 @@ brw_upload_samplers(struct brw_context *brw) fs->SamplerUnits[s] : vs->SamplerUnits[s]; if (ctx->Texture.Unit[unit]._ReallyEnabled) brw_update_sampler_state(brw, unit, s, &samplers[s], - &brw->wm.sdc_offset[s]); + *sst_offset, &sdc_offset[s]); } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } +static void +brw_upload_samplers(struct brw_context *brw) +{ + brw_upload_sampler_state_table(brw, + &brw->wm.sampler_count, + &brw->wm.sampler_offset, + brw->wm.sdc_offset); + + brw_upload_sampler_state_table(brw, + &brw->vs.sampler_count, + &brw->vs.sampler_offset, + brw->vs.sdc_offset); +} + const struct brw_tracked_state brw_samplers = { .dirty = { .mesa = _NEW_TEXTURE, diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 106d6287bf2..404fdadbe63 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -150,7 +150,7 @@ brw_upload_wm_unit(struct brw_context *brw) if (brw->wm.sampler_count) { /* reloc */ wm->wm4.sampler_state_pointer = (brw->batch.bo->offset + - brw->sampler.offset) >> 5; + brw->wm.sampler_offset) >> 5; } else { wm->wm4.sampler_state_pointer = 0; } @@ -229,9 +229,9 @@ brw_upload_wm_unit(struct brw_context *brw) drm_intel_bo_emit_reloc(brw->batch.bo, brw->wm.state_offset + offsetof(struct brw_wm_unit_state, wm4), - brw->batch.bo, (brw->sampler.offset | - wm->wm4.stats_enable | - (wm->wm4.sampler_count << 2)), + brw->batch.bo, (brw->wm.sampler_offset | + wm->wm4.stats_enable | + (wm->wm4.sampler_count << 2)), I915_GEM_DOMAIN_INSTRUCTION, 0); } diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index 0cc2a4566e1..16be8a79bfb 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -39,9 +39,9 @@ upload_sampler_state_pointers(struct brw_context *brw) GS_SAMPLER_STATE_CHANGE | PS_SAMPLER_STATE_CHANGE | (4 - 2)); - OUT_BATCH(brw->sampler.offset); /* VS */ + OUT_BATCH(brw->vs.sampler_offset); /* VS */ OUT_BATCH(0); /* GS */ - OUT_BATCH(brw->sampler.offset); + OUT_BATCH(brw->wm.sampler_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c index dd2493c2e54..45bee7850c2 100644 --- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c @@ -185,7 +185,10 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, int ss_index, static void -gen7_upload_samplers(struct brw_context *brw) +gen7_upload_sampler_state_table(struct brw_context *brw, + uint32_t *sampler_count, + uint32_t *sst_offset, + uint32_t *sdc_offset) { struct gl_context *ctx = &brw->ctx; struct gen7_sampler_state *samplers; @@ -196,17 +199,15 @@ gen7_upload_samplers(struct brw_context *brw) GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed; - brw->wm.sampler_count = _mesa_fls(SamplersUsed); - /* Currently we only use one sampler state table. Mirror the count. */ - brw->vs.sampler_count = brw->wm.sampler_count; + *sampler_count = _mesa_fls(SamplersUsed); - if (brw->wm.sampler_count == 0) + if (*sampler_count == 0) return; samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, - brw->wm.sampler_count * sizeof(*samplers), - 32, &brw->sampler.offset); - memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers)); + *sampler_count * sizeof(*samplers), + 32, sst_offset); + memset(samplers, 0, *sampler_count * sizeof(*samplers)); for (unsigned s = 0; s < brw->wm.sampler_count; s++) { if (SamplersUsed & (1 << s)) { @@ -214,13 +215,27 @@ gen7_upload_samplers(struct brw_context *brw) fs->SamplerUnits[s] : vs->SamplerUnits[s]; if (ctx->Texture.Unit[unit]._ReallyEnabled) gen7_update_sampler_state(brw, unit, s, &samplers[s], - &brw->wm.sdc_offset[s]); + &sdc_offset[s]); } } brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } +static void +gen7_upload_samplers(struct brw_context *brw) +{ + gen7_upload_sampler_state_table(brw, + &brw->wm.sampler_count, + &brw->wm.sampler_offset, + brw->wm.sdc_offset); + + gen7_upload_sampler_state_table(brw, + &brw->vs.sampler_count, + &brw->vs.sampler_offset, + brw->vs.sdc_offset); +} + const struct brw_tracked_state gen7_samplers = { .dirty = { .mesa = _NEW_TEXTURE, diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c index 634bd95a873..7a6ba59f415 100644 --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c @@ -48,7 +48,7 @@ upload_vs_state(struct brw_context *brw) /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2)); - OUT_BATCH(brw->sampler.offset); + OUT_BATCH(brw->vs.sampler_offset); ADVANCE_BATCH(); if (brw->vs.push_const_size == 0) { diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c index d079a52f315..ba7a53d8705 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c @@ -125,7 +125,7 @@ upload_ps_state(struct brw_context *brw) /* CACHE_NEW_SAMPLER */ BEGIN_BATCH(2); OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(brw->sampler.offset); + OUT_BATCH(brw->wm.sampler_offset); ADVANCE_BATCH(); /* CACHE_NEW_WM_PROG */ -- cgit v1.2.3