diff options
-rw-r--r-- | src/intel/perf/gen_perf.c | 102 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.h | 71 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 63 |
5 files changed, 136 insertions, 103 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 0bac1fd2888..03f29d5222f 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -189,6 +189,86 @@ struct oa_sample_buf { uint32_t last_timestamp; }; +struct gen_perf_context { + struct gen_perf_config *perf; + + void * ctx; /* driver context (eg, brw_context) */ + void * bufmgr; + const struct gen_device_info *devinfo; + + uint32_t hw_ctx; + int drm_fd; + + /* The i915 perf stream we open to setup + enable the OA counters */ + int oa_stream_fd; + + /* An i915 perf stream fd gives exclusive access to the OA unit that will + * report counter snapshots for a specific counter set/profile in a + * specific layout/format so we can only start OA queries that are + * compatible with the currently open fd... + */ + int current_oa_metrics_set_id; + int current_oa_format; + + /* List of buffers containing OA reports */ + struct exec_list sample_buffers; + + /* Cached list of empty sample buffers */ + struct exec_list free_sample_buffers; + + int n_active_oa_queries; + int n_active_pipeline_stats_queries; + + /* The number of queries depending on running OA counters which + * extends beyond brw_end_perf_query() since we need to wait until + * the last MI_RPC command has parsed by the GPU. + * + * Accurate accounting is important here as emitting an + * MI_REPORT_PERF_COUNT command while the OA unit is disabled will + * effectively hang the gpu. + */ + int n_oa_users; + + /* To help catch an spurious problem with the hardware or perf + * forwarding samples, we emit each MI_REPORT_PERF_COUNT command + * with a unique ID that we can explicitly check for... + */ + int next_query_start_report_id; + + /** + * An array of queries whose results haven't yet been assembled + * based on the data in buffer objects. + * + * These may be active, or have already ended. However, the + * results have not been requested. + */ + struct gen_perf_query_object **unaccumulated; + int unaccumulated_elements; + int unaccumulated_array_size; + + /* The total number of query objects so we can relinquish + * our exclusive access to perf if the application deletes + * all of its objects. (NB: We only disable perf while + * there are no active queries) + */ + int n_query_instances; +}; + +struct gen_perf_context * +gen_perf_new_context(void *parent) +{ + struct gen_perf_context *ctx = rzalloc(parent, struct gen_perf_context); + if (! ctx) + fprintf(stderr, "%s: failed to alloc context\n", __func__); + return ctx; +} + +struct gen_perf_config * +gen_perf_config(struct gen_perf_context *ctx) +{ + return ctx->perf; +} + struct gen_perf_query_object * gen_perf_new_query(struct gen_perf_context *perf_ctx, unsigned query_index) { @@ -206,6 +286,28 @@ gen_perf_new_query(struct gen_perf_context *perf_ctx, unsigned query_index) return obj; } +int +gen_perf_active_queries(struct gen_perf_context *perf_ctx, + const struct gen_perf_query_info *query) +{ + assert(perf_ctx->n_active_oa_queries == 0 || perf_ctx->n_active_pipeline_stats_queries == 0); + + switch (query->kind) { + case GEN_PERF_QUERY_TYPE_OA: + case GEN_PERF_QUERY_TYPE_RAW: + return perf_ctx->n_active_oa_queries; + break; + + case GEN_PERF_QUERY_TYPE_PIPELINE: + return perf_ctx->n_active_pipeline_stats_queries; + break; + + default: + unreachable("Unknown query type"); + break; + } +} + static bool get_sysfs_dev_dir(struct gen_perf_config *perf, int fd) { diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index d4178900714..1038d5018a9 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -321,70 +321,8 @@ struct gen_perf_query_object }; }; -struct gen_perf_context { - struct gen_perf_config *perf; - - void * ctx; /* driver context (eg, brw_context) */ - void * bufmgr; - const struct gen_device_info *devinfo; - - uint32_t hw_ctx; - int drm_fd; - - /* The i915 perf stream we open to setup + enable the OA counters */ - int oa_stream_fd; - - /* An i915 perf stream fd gives exclusive access to the OA unit that will - * report counter snapshots for a specific counter set/profile in a - * specific layout/format so we can only start OA queries that are - * compatible with the currently open fd... - */ - int current_oa_metrics_set_id; - int current_oa_format; - - /* List of buffers containing OA reports */ - struct exec_list sample_buffers; - - /* Cached list of empty sample buffers */ - struct exec_list free_sample_buffers; - - int n_active_oa_queries; - int n_active_pipeline_stats_queries; - - /* The number of queries depending on running OA counters which - * extends beyond brw_end_perf_query() since we need to wait until - * the last MI_RPC command has parsed by the GPU. - * - * Accurate accounting is important here as emitting an - * MI_REPORT_PERF_COUNT command while the OA unit is disabled will - * effectively hang the gpu. - */ - int n_oa_users; - - /* To help catch an spurious problem with the hardware or perf - * forwarding samples, we emit each MI_REPORT_PERF_COUNT command - * with a unique ID that we can explicitly check for... - */ - int next_query_start_report_id; - - /** - * An array of queries whose results haven't yet been assembled - * based on the data in buffer objects. - * - * These may be active, or have already ended. However, the - * results have not been requested. - */ - struct gen_perf_query_object **unaccumulated; - int unaccumulated_elements; - int unaccumulated_array_size; - - /* The total number of query objects so we can relinquish - * our exclusive access to perf if the application deletes - * all of its objects. (NB: We only disable perf while - * there are no active queries) - */ - int n_query_instances; -}; +struct gen_perf_context; +struct gen_perf_context *gen_perf_new_context(void *parent); void gen_perf_init_metrics(struct gen_perf_config *perf_cfg, const struct gen_device_info *devinfo, @@ -397,6 +335,11 @@ void gen_perf_init_context(struct gen_perf_context *perf_ctx, uint32_t hw_ctx, int drm_fd); +struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx); + +int gen_perf_active_queries(struct gen_perf_context *perf_ctx, + const struct gen_perf_query_info *query); + static inline size_t gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) { diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 957be4006f8..d4bbe0b2b6f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -964,6 +964,7 @@ brwCreateContext(gl_api api, *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY; return false; } + brw->perf_ctx = gen_perf_new_context(brw); driContextPriv->driverPrivate = brw; brw->driContext = driContextPriv; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index bf720bfa172..2ac443bf032 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1162,7 +1162,7 @@ struct brw_context bool supported; } predicate; - struct gen_perf_context perf_ctx; + struct gen_perf_context *perf_ctx; int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[76]; diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 80a2ba6f4b4..f7016e4dd15 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -112,7 +112,7 @@ static void dump_perf_query_callback(GLuint id, void *query_void, void *brw_void) { struct brw_context *ctx = brw_void; - struct gen_perf_context *perf_ctx = &ctx->perf_ctx; + struct gen_perf_context *perf_ctx = ctx->perf_ctx; struct gl_perf_query_object *o = query_void; struct brw_perf_query_object * brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; @@ -128,7 +128,7 @@ static void dump_perf_queries(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - gen_perf_dump_query_count(&brw->perf_ctx); + gen_perf_dump_query_count(brw->perf_ctx); _mesa_HashWalk(ctx->PerfQuery.Objects, dump_perf_query_callback, brw); } @@ -144,28 +144,14 @@ brw_get_perf_query_info(struct gl_context *ctx, GLuint *n_active) { struct brw_context *brw = brw_context(ctx); - struct gen_perf_context *perf_ctx = &brw->perf_ctx; - const struct gen_perf_query_info *query = - &perf_ctx->perf->queries[query_index]; + struct gen_perf_context *perf_ctx = brw->perf_ctx; + struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx); + const struct gen_perf_query_info *query = &perf_cfg->queries[query_index]; *name = query->name; *data_size = query->data_size; *n_counters = query->n_counters; - - switch (query->kind) { - case GEN_PERF_QUERY_TYPE_OA: - case GEN_PERF_QUERY_TYPE_RAW: - *n_active = perf_ctx->n_active_oa_queries; - break; - - case GEN_PERF_QUERY_TYPE_PIPELINE: - *n_active = perf_ctx->n_active_pipeline_stats_queries; - break; - - default: - unreachable("Unknown query type"); - break; - } + *n_active = gen_perf_active_queries(perf_ctx, query); } static GLuint @@ -213,8 +199,9 @@ brw_get_perf_counter_info(struct gl_context *ctx, GLuint64 *raw_max) { struct brw_context *brw = brw_context(ctx); + struct gen_perf_config *perf_cfg = gen_perf_config(brw->perf_ctx); const struct gen_perf_query_info *query = - &brw->perf_ctx.perf->queries[query_index]; + &perf_cfg->queries[query_index]; const struct gen_perf_query_counter *counter = &query->counters[counter_index]; @@ -260,7 +247,7 @@ brw_begin_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; - struct gen_perf_context *perf_ctx = &brw->perf_ctx; + struct gen_perf_context *perf_ctx = brw->perf_ctx; /* We can assume the frontend hides mistaken attempts to Begin a * query object multiple times before its End. Similarly if an @@ -291,7 +278,7 @@ brw_end_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; - struct gen_perf_context *perf_ctx = &brw->perf_ctx; + struct gen_perf_context *perf_ctx = brw->perf_ctx; DBG("End(%d)\n", o->Id); gen_perf_end_query(perf_ctx, obj); @@ -306,7 +293,7 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o) assert(!o->Ready); - gen_perf_wait_query(&brw->perf_ctx, obj, &brw->batch); + gen_perf_wait_query(brw->perf_ctx, obj, &brw->batch); } static bool @@ -320,7 +307,7 @@ brw_is_perf_query_ready(struct gl_context *ctx, if (o->Ready) return true; - return gen_perf_is_query_ready(&brw->perf_ctx, obj, &brw->batch); + return gen_perf_is_query_ready(brw->perf_ctx, obj, &brw->batch); } /** @@ -349,7 +336,7 @@ brw_get_perf_query_data(struct gl_context *ctx, */ assert(o->Ready); - gen_perf_get_query_data(&brw->perf_ctx, obj, + gen_perf_get_query_data(brw->perf_ctx, obj, data_size, data, bytes_written); } @@ -357,7 +344,7 @@ static struct gl_perf_query_object * brw_new_perf_query_object(struct gl_context *ctx, unsigned query_index) { struct brw_context *brw = brw_context(ctx); - struct gen_perf_context *perf_ctx = &brw->perf_ctx; + struct gen_perf_context *perf_ctx = brw->perf_ctx; struct gen_perf_query_object * obj = gen_perf_new_query(perf_ctx, query_index); if (unlikely(!obj)) return NULL; @@ -380,7 +367,7 @@ brw_delete_perf_query(struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); struct brw_perf_query_object *brw_query = brw_perf_query(o); struct gen_perf_query_object *obj = brw_query->query; - struct gen_perf_context *perf_ctx = &brw->perf_ctx; + struct gen_perf_context *perf_ctx = brw->perf_ctx; /* We can assume that the frontend waits for a query to complete * before ever calling into here, so we don't have to worry about @@ -482,12 +469,16 @@ brw_init_perf_query_info(struct gl_context *ctx) struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct gen_perf_context *perf_ctx = &brw->perf_ctx; - if (perf_ctx->perf) - return perf_ctx->perf->n_queries; + struct gen_perf_context *perf_ctx = brw->perf_ctx; + struct gen_perf_config *perf_cfg = gen_perf_config(perf_ctx); - perf_ctx->perf = gen_perf_new(brw); - struct gen_perf_config *perf_cfg = perf_ctx->perf; + if (perf_cfg) + return perf_cfg->n_queries; + + if (!oa_metrics_kernel_support(brw->screen->driScrnPriv->fd, devinfo)) + return 0; + + perf_cfg = gen_perf_new(ctx); perf_cfg->vtbl.bo_alloc = brw_oa_bo_alloc; perf_cfg->vtbl.bo_unreference = (bo_unreference_t)brw_bo_unreference; @@ -507,11 +498,7 @@ brw_init_perf_query_info(struct gl_context *ctx) gen_perf_init_context(perf_ctx, perf_cfg, brw, brw->bufmgr, devinfo, brw->hw_ctx, brw->screen->driScrnPriv->fd); - - if (!oa_metrics_kernel_support(perf_ctx->drm_fd, devinfo)) - return 0; - - gen_perf_init_metrics(perf_cfg, devinfo, perf_ctx->drm_fd); + gen_perf_init_metrics(perf_cfg, devinfo, brw->screen->driScrnPriv->fd); return perf_cfg->n_queries; } |