diff options
author | Eric Anholt <[email protected]> | 2013-03-11 12:59:06 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2013-03-14 12:30:39 -0700 |
commit | db3a0f13ef13b6d392dfc3b7346351533600d343 (patch) | |
tree | 125d3429874389addc521226419a20f3b539a3d0 | |
parent | a35a19a6ea7248d98282f87244138299fa130c85 (diff) |
i965: Split shader_time entries into separate cachelines.
This avoids some snooping overhead between EUs processing separate shaders
(so VS versus FS).
Improves performance of a minecraft trace with shader_time by 28.9% +/-
18.3% (n=7), and performance of my old GLSL demo by 93.7% +/- 0.8% (n=4).
v2: Add a define for the stride with a comment explaining its units and
why.
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 |
4 files changed, 13 insertions, 4 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index c34d6b108a2..d042dd640c9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -571,6 +571,14 @@ struct brw_vs_prog_data { #define SURF_INDEX_SOL_BINDING(t) ((t)) #define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS) +/** + * Stride in bytes between shader_time entries. + * + * We separate entries by a cacheline to reduce traffic between EUs writing to + * different entries. + */ +#define SHADER_TIME_STRIDE 64 + enum brw_cache_id { BRW_BLEND_STATE, BRW_DEPTH_STENCIL_STATE, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8ce39543002..8476bb51086 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -621,7 +621,7 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type, fs_reg offset_mrf = fs_reg(MRF, base_mrf); offset_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset_mrf, fs_reg(shader_time_index * 4))); + emit(MOV(offset_mrf, fs_reg(shader_time_index * SHADER_TIME_STRIDE))); fs_reg time_mrf = fs_reg(MRF, base_mrf + 1); time_mrf.type = BRW_REGISTER_TYPE_UD; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 75eb6bc6689..62954d3cf78 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -228,7 +228,8 @@ brw_init_shader_time(struct brw_context *brw) const int max_entries = 4096; brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time", - max_entries * 4, 4096); + max_entries * SHADER_TIME_STRIDE, + 4096); brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *, max_entries); brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, @@ -409,7 +410,7 @@ brw_collect_shader_time(struct brw_context *brw) uint32_t *times = brw->shader_time.bo->virtual; for (int i = 0; i < brw->shader_time.num_entries; i++) { - brw->shader_time.cumulative[i] += times[i]; + brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4]; } /* Zero the BO out to clear it out for our next collection. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index f319f32c2cc..d759710af3c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1225,7 +1225,7 @@ vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, dst_reg offset_mrf = dst_reg(MRF, base_mrf); offset_mrf.type = BRW_REGISTER_TYPE_UD; - emit(MOV(offset_mrf, src_reg(shader_time_index * 4))); + emit(MOV(offset_mrf, src_reg(shader_time_index * SHADER_TIME_STRIDE))); dst_reg time_mrf = dst_reg(MRF, base_mrf + 1); time_mrf.type = BRW_REGISTER_TYPE_UD; |