aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_vec4.cpp
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-11-27 14:10:52 -0800
committerEric Anholt <[email protected]>2012-12-05 14:29:44 -0800
commit71f06344a0d72a6bd27750ceca571fc016b8de85 (patch)
tree4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_vec4.cpp
parentef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff)
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine per-instruction timings, or figuring out which shader to optimize in a whole application. Note that this doesn't cover the instructions that set up the message to the URB/FB write -- we'd need to convert the MRF usage in these instructions to GRFs so that our offsets/times don't overwrite our shader outputs. Reviewed-by: Kenneth Graunke <[email protected]> (v1) v2: Check the timestamp reset flag in the VS, which is apparently getting set fairly regularly in the range we watch, resulting in negative numbers getting added to our 32-bit counter, and thus large values added to our uint64_t. v3: Rebase on reladdr changes, removing a new safety check that proved impossible to satisfy. Add a comment to the AOP defs from Ken's review, and put them in a slightly more sensible spot. v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.cpp106
1 files changed, 106 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 5200daac043..dc9d9d5d1a5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -26,6 +26,7 @@
extern "C" {
#include "main/macros.h"
+#include "main/shaderobj.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
}
@@ -248,6 +249,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
return 2;
case VS_OPCODE_SCRATCH_WRITE:
return 3;
+ case SHADER_OPCODE_SHADER_TIME_ADD:
+ return 0;
default:
assert(!"not reached");
return inst->mlen;
@@ -1039,9 +1042,109 @@ vec4_visitor::setup_payload(void)
this->first_non_payload_grf = reg;
}
+src_reg
+vec4_visitor::get_timestamp()
+{
+ assert(intel->gen >= 7);
+
+ src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_TIMESTAMP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_4,
+ BRW_SWIZZLE_XYZW,
+ WRITEMASK_XYZW));
+
+ dst_reg dst = dst_reg(this, glsl_type::uvec4_type);
+
+ vec4_instruction *mov = emit(MOV(dst, ts));
+ /* We want to read the 3 fields we care about (mostly field 0, but also 2)
+ * even if it's not enabled in the dispatch.
+ */
+ mov->force_writemask_all = true;
+
+ return src_reg(dst);
+}
+
+void
+vec4_visitor::emit_shader_time_begin()
+{
+ current_annotation = "shader time start";
+ shader_start_time = get_timestamp();
+}
+
+void
+vec4_visitor::emit_shader_time_end()
+{
+ current_annotation = "shader time end";
+ src_reg shader_end_time = get_timestamp();
+
+ emit_shader_time_write(ST_VS, shader_start_time, shader_end_time);
+}
+
+void
+vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
+ src_reg start, src_reg end)
+{
+ /* Choose an index in the buffer and set up tracking information for our
+ * printouts.
+ */
+ int shader_time_index = brw->shader_time.num_entries++;
+ assert(shader_time_index <= brw->shader_time.max_entries);
+ brw->shader_time.types[shader_time_index] = type;
+ if (prog) {
+ _mesa_reference_shader_program(ctx,
+ &brw->shader_time.programs[shader_time_index],
+ prog);
+ }
+
+ /* Check that there weren't any timestamp reset events (assuming these
+ * were the only two timestamp reads that happened).
+ */
+ src_reg reset_end = end;
+ reset_end.swizzle = BRW_SWIZZLE_ZZZZ;
+ vec4_instruction *test = emit(AND(dst_null_d(), reset_end, src_reg(1u)));
+ test->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(IF(BRW_PREDICATE_NORMAL));
+
+ /* Take the current timestamp and get the delta. */
+ start.negate = true;
+ dst_reg diff = dst_reg(this, glsl_type::uint_type);
+ emit(ADD(diff, start, end));
+
+ /* If there were no instructions between the two timestamp gets, the diff
+ * is 2 cycles. Remove that overhead, so I can forget about that when
+ * trying to determine the time taken for single instructions.
+ */
+ emit(ADD(diff, src_reg(diff), src_reg(-2u)));
+
+ int base_mrf = 6;
+
+ dst_reg offset_mrf = dst_reg(MRF, base_mrf);
+ offset_mrf.type = BRW_REGISTER_TYPE_UD;
+ emit(MOV(offset_mrf, src_reg(shader_time_index * 4)));
+
+ dst_reg time_mrf = dst_reg(MRF, base_mrf + 1);
+ time_mrf.type = BRW_REGISTER_TYPE_UD;
+ emit(MOV(time_mrf, src_reg(diff)));
+
+ vec4_instruction *inst;
+ inst = emit(SHADER_OPCODE_SHADER_TIME_ADD);
+ inst->base_mrf = base_mrf;
+ inst->mlen = 2;
+
+ emit(BRW_OPCODE_ENDIF);
+}
+
bool
vec4_visitor::run()
{
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ emit_shader_time_begin();
+
emit_attribute_fixups();
/* Generate VS IR for main(). (the visitor only descends into
@@ -1057,6 +1160,9 @@ vec4_visitor::run()
if (c->key.userclip_active && !c->key.uses_clip_distance)
setup_uniform_clipplane_values();
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ emit_shader_time_end();
+
emit_urb_writes();
/* Before any optimization, push array accesses out to scratch