diff options
author | Eric Anholt <[email protected]> | 2012-11-27 14:10:52 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-12-05 14:29:44 -0800 |
commit | 71f06344a0d72a6bd27750ceca571fc016b8de85 (patch) | |
tree | 4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_vec4.cpp | |
parent | ef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff) |
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine
per-instruction timings, or figuring out which shader to optimize in a
whole application.
Note that this doesn't cover the instructions that set up the message to
the URB/FB write -- we'd need to convert the MRF usage in these
instructions to GRFs so that our offsets/times don't overwrite our
shader outputs.
Reviewed-by: Kenneth Graunke <[email protected]> (v1)
v2: Check the timestamp reset flag in the VS, which is apparently
getting set fairly regularly in the range we watch, resulting in
negative numbers getting added to our 32-bit counter, and thus large
values added to our uint64_t.
v3: Rebase on reladdr changes, removing a new safety check that proved
impossible to satisfy. Add a comment to the AOP defs from Ken's
review, and put them in a slightly more sensible spot.
v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 5200daac043..dc9d9d5d1a5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -26,6 +26,7 @@ extern "C" { #include "main/macros.h" +#include "main/shaderobj.h" #include "program/prog_print.h" #include "program/prog_parameter.h" } @@ -248,6 +249,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst) return 2; case VS_OPCODE_SCRATCH_WRITE: return 3; + case SHADER_OPCODE_SHADER_TIME_ADD: + return 0; default: assert(!"not reached"); return inst->mlen; @@ -1039,9 +1042,109 @@ vec4_visitor::setup_payload(void) this->first_non_payload_grf = reg; } +src_reg +vec4_visitor::get_timestamp() +{ + assert(intel->gen >= 7); + + src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_TIMESTAMP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_4, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW)); + + dst_reg dst = dst_reg(this, glsl_type::uvec4_type); + + vec4_instruction *mov = emit(MOV(dst, ts)); + /* We want to read the 3 fields we care about (mostly field 0, but also 2) + * even if it's not enabled in the dispatch. + */ + mov->force_writemask_all = true; + + return src_reg(dst); +} + +void +vec4_visitor::emit_shader_time_begin() +{ + current_annotation = "shader time start"; + shader_start_time = get_timestamp(); +} + +void +vec4_visitor::emit_shader_time_end() +{ + current_annotation = "shader time end"; + src_reg shader_end_time = get_timestamp(); + + emit_shader_time_write(ST_VS, shader_start_time, shader_end_time); +} + +void +vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type, + src_reg start, src_reg end) +{ + /* Choose an index in the buffer and set up tracking information for our + * printouts. + */ + int shader_time_index = brw->shader_time.num_entries++; + assert(shader_time_index <= brw->shader_time.max_entries); + brw->shader_time.types[shader_time_index] = type; + if (prog) { + _mesa_reference_shader_program(ctx, + &brw->shader_time.programs[shader_time_index], + prog); + } + + /* Check that there weren't any timestamp reset events (assuming these + * were the only two timestamp reads that happened). + */ + src_reg reset_end = end; + reset_end.swizzle = BRW_SWIZZLE_ZZZZ; + vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u))); + test->conditional_mod = BRW_CONDITIONAL_Z; + + emit(IF(BRW_PREDICATE_NORMAL)); + + /* Take the current timestamp and get the delta. */ + start.negate = true; + dst_reg diff = dst_reg(this, glsl_type::uint_type); + emit(ADD(diff, start, end)); + + /* If there were no instructions between the two timestamp gets, the diff + * is 2 cycles. Remove that overhead, so I can forget about that when + * trying to determine the time taken for single instructions. + */ + emit(ADD(diff, src_reg(diff), src_reg(-2u))); + + int base_mrf = 6; + + dst_reg offset_mrf = dst_reg(MRF, base_mrf); + offset_mrf.type = BRW_REGISTER_TYPE_UD; + emit(MOV(offset_mrf, src_reg(shader_time_index * 4))); + + dst_reg time_mrf = dst_reg(MRF, base_mrf + 1); + time_mrf.type = BRW_REGISTER_TYPE_UD; + emit(MOV(time_mrf, src_reg(diff))); + + vec4_instruction *inst; + inst = emit(SHADER_OPCODE_SHADER_TIME_ADD); + inst->base_mrf = base_mrf; + inst->mlen = 2; + + emit(BRW_OPCODE_ENDIF); +} + bool vec4_visitor::run() { + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_begin(); + emit_attribute_fixups(); /* Generate VS IR for main(). (the visitor only descends into @@ -1057,6 +1160,9 @@ vec4_visitor::run() if (c->key.userclip_active && !c->key.uses_clip_distance) setup_uniform_clipplane_values(); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + emit_urb_writes(); /* Before any optimization, push array accesses out to scratch |