diff options
author | Eric Anholt <[email protected]> | 2012-02-15 14:15:14 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-02-21 11:54:14 -0800 |
commit | f9c3ea32cd9b243050ee16f10d6eb9d9c8b3a8ea (patch) | |
tree | bc8d85b12d70377ce22fa3444e22e2cffec64120 /src | |
parent | 07e00b3040d6da381595c65db5afe597f20d99fc (diff) |
i965: Split the gen6 GS binding table to a separate table.
Improves VS state change microbenchmark performance by 7.08729% +/-
1.22289% (n=10) on gen7, because we don't upload the 64 dwords of
unused binding table any more.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 23 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_upload.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_sol.c | 58 |
5 files changed, 75 insertions, 10 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 44a01e69ba5..9c89617e66d 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -484,11 +484,6 @@ struct brw_vs_ouput_sizes { * | . | . | * | : | : | * | 24 | Texture 15 | - * +-----|-------------------------+ - * | 25 | SOL Binding 0 | - * | . | . | - * | : | : | - * | 88 | SOL Binding 63 | * +-------------------------------+ * * Our VS binding tables are programmed as follows: @@ -502,6 +497,15 @@ struct brw_vs_ouput_sizes { * | 16 | Texture 15 | * +-------------------------------+ * + * Our (gen6) GS binding tables are programmed as follows: + * + * +-----+-------------------------+ + * | 0 | SOL Binding 0 | + * | . | . | + * | : | : | + * | 63 | SOL Binding 63 | + * +-----+-------------------------+ + * * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be * the identity function or things will break. We do want to keep draw buffers * first so we can use headerless render target writes for RT 0. @@ -509,15 +513,17 @@ struct brw_vs_ouput_sizes { #define SURF_INDEX_DRAW(d) (d) #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1) #define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t)) -#define SURF_INDEX_SOL_BINDING(t) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t)) /** Maximum size of the binding table. */ -#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) +#define BRW_MAX_SURFACES SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) #define SURF_INDEX_VERT_CONST_BUFFER (0) #define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t)) #define BRW_MAX_VS_SURFACES SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) +#define SURF_INDEX_SOL_BINDING(t) ((t)) +#define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) + enum brw_cache_id { BRW_BLEND_STATE, BRW_DEPTH_STENCIL_STATE, @@ -868,6 +874,9 @@ struct brw_context /** Offset in the program cache to the CLIP program pre-gen6 */ uint32_t prog_offset; uint32_t state_offset; + + uint32_t bind_bo_offset; + uint32_t surf_offset[BRW_MAX_VS_SURFACES]; } gs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 7bc7e1c1025..c86755de659 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) GEN6_BINDING_TABLE_MODIFY_PS | (4 - 2)); OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ - OUT_BATCH(brw->bind.bo_offset); /* gs */ + OUT_BATCH(brw->gs.bind_bo_offset); /* gs */ OUT_BATCH(brw->bind.bo_offset); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 59a2bb32501..a58b4b3c0b8 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -90,6 +90,7 @@ extern const struct brw_tracked_state gen6_clip_vp; extern const struct brw_tracked_state gen6_color_calc_state; extern const struct brw_tracked_state gen6_depth_stencil_state; extern const struct brw_tracked_state gen6_gs_state; +extern const struct brw_tracked_state gen6_gs_binding_table; extern const struct brw_tracked_state gen6_renderbuffer_surfaces; extern const struct brw_tracked_state gen6_sampler_state; extern const struct brw_tracked_state gen6_scissor_state; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 28e4d26209e..3f5c03d8f90 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -148,6 +148,7 @@ static const struct brw_tracked_state *gen6_atoms[] = &brw_texture_surfaces, &gen6_sol_surface, &brw_vs_binding_table, + &gen6_gs_binding_table, &brw_binding_table, &brw_samplers, diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 41923b7f527..fbd8e71631f 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -30,6 +30,7 @@ #include "brw_context.h" #include "intel_batchbuffer.h" #include "brw_defines.h" +#include "brw_state.h" static void gen6_update_sol_surfaces(struct brw_context *brw) @@ -54,11 +55,11 @@ gen6_update_sol_surfaces(struct brw_context *brw) xfb_obj->Offset[buffer] / 4 + linked_xfb_info->Outputs[i].DstOffset; brw_update_sol_surface( - brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index], + brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index], linked_xfb_info->Outputs[i].NumComponents, linked_xfb_info->BufferStride[buffer], buffer_offset); } else { - brw->bind.surf_offset[surf_index] = 0; + brw->gs.surf_offset[surf_index] = 0; } } @@ -75,6 +76,59 @@ const struct brw_tracked_state gen6_sol_surface = { .emit = gen6_update_sol_surfaces, }; +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static void +brw_gs_upload_binding_table(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + const struct gl_shader_program *shaderprog = + ctx->Shader.CurrentVertexProgram; + const struct gl_transform_feedback_info *linked_xfb_info = + &shaderprog->LinkedTransformFeedback; + /* Currently we only ever upload surfaces for SOL. */ + bool has_surfaces = linked_xfb_info->NumOutputs != 0; + + uint32_t *bind; + + /* CACHE_NEW_GS_PROG: Skip making a binding table if we don't use textures or + * pull constants. + */ + if (!has_surfaces) { + if (brw->gs.bind_bo_offset != 0) { + brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE; + brw->gs.bind_bo_offset = 0; + } + return; + } + + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. + */ + bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * BRW_MAX_SURFACES, + 32, &brw->gs.bind_bo_offset); + + /* BRW_NEW_SURFACES */ + memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t)); + + brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE; +} + +const struct brw_tracked_state gen6_gs_binding_table = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_SURFACES), + .cache = 0 + }, + .emit = brw_gs_upload_binding_table, +}; + static void gen6_update_sol_indices(struct brw_context *brw) { |