diff options
author | Eric Anholt <[email protected]> | 2012-11-27 14:10:52 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2012-12-05 14:29:44 -0800 |
commit | 71f06344a0d72a6bd27750ceca571fc016b8de85 (patch) | |
tree | 4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_eu_emit.c | |
parent | ef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff) |
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine
per-instruction timings, or figuring out which shader to optimize in a
whole application.
Note that this doesn't cover the instructions that set up the message to
the URB/FB write -- we'd need to convert the MRF usage in these
instructions to GRFs so that our offsets/times don't overwrite our
shader outputs.
Reviewed-by: Kenneth Graunke <[email protected]> (v1)
v2: Check the timestamp reset flag in the VS, which is apparently
getting set fairly regularly in the range we watch, resulting in
negative numbers getting added to our 32-bit counter, and thus large
values added to our uint64_t.
v3: Rebase on reladdr changes, removing a new safety check that proved
impossible to satisfy. Add a comment to the AOP defs from Ken's
review, and put them in a slightly more sensible spot.
v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 56 |
1 files changed, 54 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8a629ff0b40..fb1255f728c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -253,7 +253,6 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, assert(!reg.negate); assert(!reg.abs); assert(reg.address_mode == BRW_ADDRESS_DIRECT); - assert(reg.vstride != BRW_VERTICAL_STRIDE_0); } validate_reg(insn, reg); @@ -332,7 +331,8 @@ void brw_set_src1(struct brw_compile *p, { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); - assert(reg.nr < 128); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); gen7_convert_mrf_to_grf(p, ®); @@ -2448,3 +2448,55 @@ brw_svb_write(struct brw_compile *p, 0, /* end_of_thread */ send_commit_msg); /* send_commit_msg */ } + +/** + * This instruction is generated as a single-channel align1 instruction by + * both the VS and FS stages when using INTEL_DEBUG=shader_time. + * + * We can't use the typed atomic op in the FS because that has the execution + * mask ANDed with the pixel mask, but we just want to write the one dword for + * all the pixels. + * + * We don't use the SIMD4x2 atomic ops in the VS because want to just write + * one u32. So we use the same untyped atomic write message as the pixel + * shader. + * + * The untyped atomic operation requires a BUFFER surface type with RAW + * format, and is only accessible through the legacy DATA_CACHE dataport + * messages. + */ +void brw_shader_time_add(struct brw_compile *p, + int base_mrf, + uint32_t surf_index) +{ + struct intel_context *intel = &p->brw->intel; + assert(intel->gen >= 7); + + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_mask_control(p, BRW_MASK_DISABLE); + struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); + brw_pop_insn_state(p); + + /* We use brw_vec1_reg and unmasked because we want to increment the given + * offset only once. + */ + brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, 0)); + brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + base_mrf, 0)); + + bool header_present = false; + bool eot = false; + uint32_t mlen = 2; /* offset, value */ + uint32_t rlen = 0; + brw_set_message_descriptor(p, send, + GEN7_SFID_DATAPORT_DATA_CACHE, + mlen, rlen, header_present, eot); + + send->bits3.ud |= 6 << 14; /* untyped atomic op */ + send->bits3.ud |= 0 << 13; /* no return data */ + send->bits3.ud |= 1 << 12; /* SIMD8 mode */ + send->bits3.ud |= BRW_AOP_ADD << 8; + send->bits3.ud |= surf_index << 0; +} |