aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_eu_emit.c
diff options
context:
space:
mode:
authorEric Anholt <[email protected]>2012-11-27 14:10:52 -0800
committerEric Anholt <[email protected]>2012-12-05 14:29:44 -0800
commit71f06344a0d72a6bd27750ceca571fc016b8de85 (patch)
tree4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_eu_emit.c
parentef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff)
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine per-instruction timings, or figuring out which shader to optimize in a whole application. Note that this doesn't cover the instructions that set up the message to the URB/FB write -- we'd need to convert the MRF usage in these instructions to GRFs so that our offsets/times don't overwrite our shader outputs. Reviewed-by: Kenneth Graunke <[email protected]> (v1) v2: Check the timestamp reset flag in the VS, which is apparently getting set fairly regularly in the range we watch, resulting in negative numbers getting added to our 32-bit counter, and thus large values added to our uint64_t. v3: Rebase on reladdr changes, removing a new safety check that proved impossible to satisfy. Add a comment to the AOP defs from Ken's review, and put them in a slightly more sensible spot. v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c56
1 files changed, 54 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 8a629ff0b40..fb1255f728c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -253,7 +253,6 @@ brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
assert(!reg.negate);
assert(!reg.abs);
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
- assert(reg.vstride != BRW_VERTICAL_STRIDE_0);
}
validate_reg(insn, reg);
@@ -332,7 +331,8 @@ void brw_set_src1(struct brw_compile *p,
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
- assert(reg.nr < 128);
+ if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(reg.nr < 128);
gen7_convert_mrf_to_grf(p, &reg);
@@ -2448,3 +2448,55 @@ brw_svb_write(struct brw_compile *p,
0, /* end_of_thread */
send_commit_msg); /* send_commit_msg */
}
+
+/**
+ * This instruction is generated as a single-channel align1 instruction by
+ * both the VS and FS stages when using INTEL_DEBUG=shader_time.
+ *
+ * We can't use the typed atomic op in the FS because that has the execution
+ * mask ANDed with the pixel mask, but we just want to write the one dword for
+ * all the pixels.
+ *
+ * We don't use the SIMD4x2 atomic ops in the VS because want to just write
+ * one u32. So we use the same untyped atomic write message as the pixel
+ * shader.
+ *
+ * The untyped atomic operation requires a BUFFER surface type with RAW
+ * format, and is only accessible through the legacy DATA_CACHE dataport
+ * messages.
+ */
+void brw_shader_time_add(struct brw_compile *p,
+ int base_mrf,
+ uint32_t surf_index)
+{
+ struct intel_context *intel = &p->brw->intel;
+ assert(intel->gen >= 7);
+
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_pop_insn_state(p);
+
+ /* We use brw_vec1_reg and unmasked because we want to increment the given
+ * offset only once.
+ */
+ brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL, 0));
+ brw_set_src0(p, send, brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
+ base_mrf, 0));
+
+ bool header_present = false;
+ bool eot = false;
+ uint32_t mlen = 2; /* offset, value */
+ uint32_t rlen = 0;
+ brw_set_message_descriptor(p, send,
+ GEN7_SFID_DATAPORT_DATA_CACHE,
+ mlen, rlen, header_present, eot);
+
+ send->bits3.ud |= 6 << 14; /* untyped atomic op */
+ send->bits3.ud |= 0 << 13; /* no return data */
+ send->bits3.ud |= 1 << 12; /* SIMD8 mode */
+ send->bits3.ud |= BRW_AOP_ADD << 8;
+ send->bits3.ud |= surf_index << 0;
+}