diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/perf/gen_perf.h | 125 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 155 |
2 files changed, 141 insertions, 139 deletions
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 2a8d9310f22..cb4d0d2da6c 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -31,6 +31,7 @@ #include <sys/sysmacros.h> #include "util/hash_table.h" +#include "compiler/glsl/list.h" #include "util/ralloc.h" struct gen_device_info; @@ -91,6 +92,9 @@ struct gen_pipeline_stat { #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) +#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ + 256) /* OA counter report */ + struct gen_perf_query_result { /** * Storage for the final accumulated OA counters. @@ -224,6 +228,127 @@ struct gen_perf_config { } vtbl; }; +/** + * Periodic OA samples are read() into these buffer structures via the + * i915 perf kernel interface and appended to the + * brw->perfquery.sample_buffers linked list. When we process the + * results of an OA metrics query we need to consider all the periodic + * samples between the Begin and End MI_REPORT_PERF_COUNT command + * markers. + * + * 'Periodic' is a simplification as there are other automatic reports + * written by the hardware also buffered here. + * + * Considering three queries, A, B and C: + * + * Time ----> + * ________________A_________________ + * | | + * | ________B_________ _____C___________ + * | | | | | | + * + * And an illustration of sample buffers read over this time frame: + * [HEAD ][ ][ ][ ][ ][ ][ ][ ][TAIL ] + * + * These nodes may hold samples for query A: + * [ ][ ][ A ][ A ][ A ][ A ][ A ][ ][ ] + * + * These nodes may hold samples for query B: + * [ ][ ][ B ][ B ][ B ][ ][ ][ ][ ] + * + * These nodes may hold samples for query C: + * [ ][ ][ ][ ][ ][ C ][ C ][ C ][ ] + * + * The illustration assumes we have an even distribution of periodic + * samples so all nodes have the same size plotted against time: + * + * Note, to simplify code, the list is never empty. + * + * With overlapping queries we can see that periodic OA reports may + * relate to multiple queries and care needs to be take to keep + * track of sample buffers until there are no queries that might + * depend on their contents. + * + * We use a node ref counting system where a reference ensures that a + * node and all following nodes can't be freed/recycled until the + * reference drops to zero. + * + * E.g. with a ref of one here: + * [ 0 ][ 0 ][ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] + * + * These nodes could be freed or recycled ("reaped"): + * [ 0 ][ 0 ] + * + * These must be preserved until the leading ref drops to zero: + * [ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] + * + * When a query starts we take a reference on the current tail of + * the list, knowing that no already-buffered samples can possibly + * relate to the newly-started query. A pointer to this node is + * also saved in the query object's ->oa.samples_head. + * + * E.g. starting query A while there are two nodes in .sample_buffers: + * ________________A________ + * | + * + * [ 0 ][ 1 ] + * ^_______ Add a reference and store pointer to node in + * A->oa.samples_head + * + * Moving forward to when the B query starts with no new buffer nodes: + * (for reference, i915 perf reads() are only done when queries finish) + * ________________A_______ + * | ________B___ + * | | + * + * [ 0 ][ 2 ] + * ^_______ Add a reference and store pointer to + * node in B->oa.samples_head + * + * Once a query is finished, after an OA query has become 'Ready', + * once the End OA report has landed and after we we have processed + * all the intermediate periodic samples then we drop the + * ->oa.samples_head reference we took at the start. + * + * So when the B query has finished we have: + * ________________A________ + * | ______B___________ + * | | | + * [ 0 ][ 1 ][ 0 ][ 0 ][ 0 ] + * ^_______ Drop B->oa.samples_head reference + * + * We still can't free these due to the A->oa.samples_head ref: + * [ 1 ][ 0 ][ 0 ][ 0 ] + * + * When the A query finishes: (note there's a new ref for C's samples_head) + * ________________A_________________ + * | | + * | _____C_________ + * | | | + * [ 0 ][ 0 ][ 0 ][ 0 ][ 1 ][ 0 ][ 0 ] + * ^_______ Drop A->oa.samples_head reference + * + * And we can now reap these nodes up to the C->oa.samples_head: + * [ X ][ X ][ X ][ X ] + * keeping -> [ 1 ][ 0 ][ 0 ] + * + * We reap old sample buffers each time we finish processing an OA + * query by iterating the sample_buffers list from the head until we + * find a referenced node and stop. + * + * Reaped buffers move to a perfquery.free_sample_buffers list and + * when we come to read() we first look to recycle a buffer from the + * free_sample_buffers list before allocating a new buffer. + */ +struct oa_sample_buf { + struct exec_node link; + int refcount; + int len; + uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10]; + uint32_t last_timestamp; +}; + + static inline size_t gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) { diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 6f7ab86d42b..0ecdac48253 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -86,129 +86,6 @@ #define OAREPORT_REASON_CTX_SWITCH (1<<3) #define OAREPORT_REASON_GO_TRANSITION (1<<4) -#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ - 256) /* OA counter report */ - -/** - * Periodic OA samples are read() into these buffer structures via the - * i915 perf kernel interface and appended to the - * brw->perfquery.sample_buffers linked list. When we process the - * results of an OA metrics query we need to consider all the periodic - * samples between the Begin and End MI_REPORT_PERF_COUNT command - * markers. - * - * 'Periodic' is a simplification as there are other automatic reports - * written by the hardware also buffered here. - * - * Considering three queries, A, B and C: - * - * Time ----> - * ________________A_________________ - * | | - * | ________B_________ _____C___________ - * | | | | | | - * - * And an illustration of sample buffers read over this time frame: - * [HEAD ][ ][ ][ ][ ][ ][ ][ ][TAIL ] - * - * These nodes may hold samples for query A: - * [ ][ ][ A ][ A ][ A ][ A ][ A ][ ][ ] - * - * These nodes may hold samples for query B: - * [ ][ ][ B ][ B ][ B ][ ][ ][ ][ ] - * - * These nodes may hold samples for query C: - * [ ][ ][ ][ ][ ][ C ][ C ][ C ][ ] - * - * The illustration assumes we have an even distribution of periodic - * samples so all nodes have the same size plotted against time: - * - * Note, to simplify code, the list is never empty. - * - * With overlapping queries we can see that periodic OA reports may - * relate to multiple queries and care needs to be take to keep - * track of sample buffers until there are no queries that might - * depend on their contents. - * - * We use a node ref counting system where a reference ensures that a - * node and all following nodes can't be freed/recycled until the - * reference drops to zero. - * - * E.g. with a ref of one here: - * [ 0 ][ 0 ][ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] - * - * These nodes could be freed or recycled ("reaped"): - * [ 0 ][ 0 ] - * - * These must be preserved until the leading ref drops to zero: - * [ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] - * - * When a query starts we take a reference on the current tail of - * the list, knowing that no already-buffered samples can possibly - * relate to the newly-started query. A pointer to this node is - * also saved in the query object's ->oa.samples_head. - * - * E.g. starting query A while there are two nodes in .sample_buffers: - * ________________A________ - * | - * - * [ 0 ][ 1 ] - * ^_______ Add a reference and store pointer to node in - * A->oa.samples_head - * - * Moving forward to when the B query starts with no new buffer nodes: - * (for reference, i915 perf reads() are only done when queries finish) - * ________________A_______ - * | ________B___ - * | | - * - * [ 0 ][ 2 ] - * ^_______ Add a reference and store pointer to - * node in B->oa.samples_head - * - * Once a query is finished, after an OA query has become 'Ready', - * once the End OA report has landed and after we we have processed - * all the intermediate periodic samples then we drop the - * ->oa.samples_head reference we took at the start. - * - * So when the B query has finished we have: - * ________________A________ - * | ______B___________ - * | | | - * [ 0 ][ 1 ][ 0 ][ 0 ][ 0 ] - * ^_______ Drop B->oa.samples_head reference - * - * We still can't free these due to the A->oa.samples_head ref: - * [ 1 ][ 0 ][ 0 ][ 0 ] - * - * When the A query finishes: (note there's a new ref for C's samples_head) - * ________________A_________________ - * | | - * | _____C_________ - * | | | - * [ 0 ][ 0 ][ 0 ][ 0 ][ 1 ][ 0 ][ 0 ] - * ^_______ Drop A->oa.samples_head reference - * - * And we can now reap these nodes up to the C->oa.samples_head: - * [ X ][ X ][ X ][ X ] - * keeping -> [ 1 ][ 0 ][ 0 ] - * - * We reap old sample buffers each time we finish processing an OA - * query by iterating the sample_buffers list from the head until we - * find a referenced node and stop. - * - * Reaped buffers move to a perfquery.free_sample_buffers list and - * when we come to read() we first look to recycle a buffer from the - * free_sample_buffers list before allocating a new buffer. - */ -struct brw_oa_sample_buf { - struct exec_node link; - int refcount; - int len; - uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10]; - uint32_t last_timestamp; -}; - /** Downcasting convenience macro. */ static inline struct brw_perf_query_object * brw_perf_query(struct gl_perf_query_object *o) @@ -304,14 +181,14 @@ dump_perf_queries(struct brw_context *brw) /******************************************************************************/ -static struct brw_oa_sample_buf * +static struct oa_sample_buf * get_free_sample_buf(struct brw_context *brw) { struct exec_node *node = exec_list_pop_head(&brw->perfquery.free_sample_buffers); - struct brw_oa_sample_buf *buf; + struct oa_sample_buf *buf; if (node) - buf = exec_node_data(struct brw_oa_sample_buf, node, link); + buf = exec_node_data(struct oa_sample_buf, node, link); else { buf = ralloc_size(brw, sizeof(*buf)); @@ -328,15 +205,15 @@ reap_old_sample_buffers(struct brw_context *brw) { struct exec_node *tail_node = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *tail_buf = - exec_node_data(struct brw_oa_sample_buf, tail_node, link); + struct oa_sample_buf *tail_buf = + exec_node_data(struct oa_sample_buf, tail_node, link); /* Remove all old, unreferenced sample buffers walking forward from * the head of the list, except always leave at least one node in * the list so we always have a node to reference when we Begin * a new query. */ - foreach_list_typed_safe(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_safe(struct oa_sample_buf, buf, link, &brw->perfquery.sample_buffers) { if (buf->refcount == 0 && buf != tail_buf) { @@ -350,7 +227,7 @@ reap_old_sample_buffers(struct brw_context *brw) static void free_sample_bufs(struct brw_context *brw) { - foreach_list_typed_safe(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_safe(struct oa_sample_buf, buf, link, &brw->perfquery.free_sample_buffers) ralloc_free(buf); @@ -532,8 +409,8 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, * referenced by any other queries... */ - struct brw_oa_sample_buf *buf = - exec_node_data(struct brw_oa_sample_buf, obj->oa.samples_head, link); + struct oa_sample_buf *buf = + exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link); assert(buf->refcount > 0); buf->refcount--; @@ -604,12 +481,12 @@ read_oa_samples_until(struct brw_context *brw, { struct exec_node *tail_node = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *tail_buf = - exec_node_data(struct brw_oa_sample_buf, tail_node, link); + struct oa_sample_buf *tail_buf = + exec_node_data(struct oa_sample_buf, tail_node, link); uint32_t last_timestamp = tail_buf->last_timestamp; while (1) { - struct brw_oa_sample_buf *buf = get_free_sample_buf(brw); + struct oa_sample_buf *buf = get_free_sample_buf(brw); uint32_t offset; int len; @@ -764,7 +641,7 @@ accumulate_oa_reports(struct brw_context *brw, */ first_samples_node = obj->oa.samples_head->next; - foreach_list_typed_from(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_from(struct oa_sample_buf, buf, link, &brw->perfquery.sample_buffers, first_samples_node) { @@ -1162,8 +1039,8 @@ brw_begin_perf_query(struct gl_context *ctx, assert(!exec_list_is_empty(&brw->perfquery.sample_buffers)); obj->oa.samples_head = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *buf = - exec_node_data(struct brw_oa_sample_buf, obj->oa.samples_head, link); + struct oa_sample_buf *buf = + exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link); /* This reference will ensure that future/following sample * buffers (that may relate to this query) can't be freed until @@ -1805,7 +1682,7 @@ brw_init_perf_query_info(struct gl_context *ctx) * Begin an OA query we can always take a reference on a buffer * in this list. */ - struct brw_oa_sample_buf *buf = get_free_sample_buf(brw); + struct oa_sample_buf *buf = get_free_sample_buf(brw); exec_list_push_head(&brw->perfquery.sample_buffers, &buf->link); brw->perfquery.oa_stream_fd = -1; |