summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorMark Janes <[email protected]>2019-08-02 17:17:54 -0700
committerMark Janes <[email protected]>2019-08-07 21:33:56 -0700
commit9f5c160d822adc78986ec6b95ab61d2674edddd4 (patch)
tree3724565b845667956bb65cafac688bec00519da4 /src/intel
parent9f84efb452f810494e8ba78a68b56444e343e5f6 (diff)
intel/perf: move initialization of pipeline statistics metrics to gen_perf
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/perf/gen_perf.c282
-rw-r--r--src/intel/perf/gen_perf.h61
2 files changed, 219 insertions, 124 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c
index 5fb6044a859..4e29bc17c0a 100644
--- a/src/intel/perf/gen_perf.c
+++ b/src/intel/perf/gen_perf.c
@@ -61,6 +61,11 @@
#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
+#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8)
+#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8)
+
#define MAP_READ (1 << 0)
#define MAP_WRITE (1 << 1)
@@ -165,13 +170,32 @@ read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf,
return read_file_uint64(buf, value);
}
+static inline struct gen_perf_query_info *
+append_query_info(struct gen_perf_config *perf, int max_counters)
+{
+ struct gen_perf_query_info *query;
+
+ perf->queries = reralloc(perf, perf->queries,
+ struct gen_perf_query_info,
+ ++perf->n_queries);
+ query = &perf->queries[perf->n_queries - 1];
+ memset(query, 0, sizeof(*query));
+
+ if (max_counters > 0) {
+ query->max_counters = max_counters;
+ query->counters =
+ rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
+ }
+
+ return query;
+}
+
static void
register_oa_config(struct gen_perf_config *perf,
const struct gen_perf_query_info *query,
uint64_t config_id)
{
- struct gen_perf_query_info *registred_query =
- gen_perf_query_append_query_info(perf, 0);
+ struct gen_perf_query_info *registred_query = append_query_info(perf, 0);
*registred_query = *query;
registred_query->oa_metrics_set_id = config_id;
@@ -395,8 +419,123 @@ get_register_queries_function(const struct gen_device_info *devinfo)
return NULL;
}
-bool
-gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
+static inline void
+add_stat_reg(struct gen_perf_query_info *query, uint32_t reg,
+ uint32_t numerator, uint32_t denominator,
+ const char *name, const char *description)
+{
+ struct gen_perf_query_counter *counter;
+
+ assert(query->n_counters < query->max_counters);
+
+ counter = &query->counters[query->n_counters];
+ counter->name = name;
+ counter->desc = description;
+ counter->type = GEN_PERF_COUNTER_TYPE_RAW;
+ counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
+ counter->offset = sizeof(uint64_t) * query->n_counters;
+ counter->pipeline_stat.reg = reg;
+ counter->pipeline_stat.numerator = numerator;
+ counter->pipeline_stat.denominator = denominator;
+
+ query->n_counters++;
+}
+
+static inline void
+add_basic_stat_reg(struct gen_perf_query_info *query,
+ uint32_t reg, const char *name)
+{
+ add_stat_reg(query, reg, 1, 1, name, name);
+}
+
+static void
+load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ struct gen_perf_query_info *query =
+ append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Pipeline Statistics Registers";
+
+ add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+
+ if (devinfo->gen == 6) {
+ add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1,
+ "SO_PRIM_STORAGE_NEEDED",
+ "N geometry shader stream-out primitives (total)");
+ add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1,
+ "SO_NUM_PRIMS_WRITTEN",
+ "N geometry shader stream-out primitives (written)");
+ } else {
+ add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 0)",
+ "N stream-out (stream 0) primitives (total)");
+ add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 1)",
+ "N stream-out (stream 1) primitives (total)");
+ add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 2)",
+ "N stream-out (stream 2) primitives (total)");
+ add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1,
+ "SO_PRIM_STORAGE_NEEDED (Stream 3)",
+ "N stream-out (stream 3) primitives (total)");
+ add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 0)",
+ "N stream-out (stream 0) primitives (written)");
+ add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 1)",
+ "N stream-out (stream 1) primitives (written)");
+ add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 2)",
+ "N stream-out (stream 2) primitives (written)");
+ add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1,
+ "SO_NUM_PRIMS_WRITTEN (Stream 3)",
+ "N stream-out (stream 3) primitives (written)");
+ }
+
+ add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+
+ add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+
+ add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+
+ add_basic_stat_reg(query, PS_DEPTH_COUNT,
+ "N z-pass fragments");
+
+ if (devinfo->gen >= 7) {
+ add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+}
+
+static bool
+load_oa_metrics(struct gen_perf_config *perf, int fd,
const struct gen_device_info *devinfo)
{
perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo);
@@ -583,6 +722,62 @@ gen_perf_query_result_clear(struct gen_perf_query_result *result)
}
static void
+gen_perf_query_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo)
+{
+ if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
+ return;
+
+ struct gen_perf_query_info *query =
+ append_query_info(perf_cfg, MAX_STAT_COUNTERS);
+
+ query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
+ query->name = "Intel_Raw_Pipeline_Statistics_Query";
+
+ /* The order has to match mdapi_pipeline_metrics. */
+ add_basic_stat_reg(query, IA_VERTICES_COUNT,
+ "N vertices submitted");
+ add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
+ "N primitives submitted");
+ add_basic_stat_reg(query, VS_INVOCATION_COUNT,
+ "N vertex shader invocations");
+ add_basic_stat_reg(query, GS_INVOCATION_COUNT,
+ "N geometry shader invocations");
+ add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
+ "N geometry shader primitives emitted");
+ add_basic_stat_reg(query, CL_INVOCATION_COUNT,
+ "N primitives entering clipping");
+ add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
+ "N primitives leaving clipping");
+ if (devinfo->is_haswell || devinfo->gen == 8) {
+ add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
+ "N fragment shader invocations",
+ "N fragment shader invocations");
+ } else {
+ add_basic_stat_reg(query, PS_INVOCATION_COUNT,
+ "N fragment shader invocations");
+ }
+ add_basic_stat_reg(query, HS_INVOCATION_COUNT,
+ "N TCS shader invocations");
+ add_basic_stat_reg(query, DS_INVOCATION_COUNT,
+ "N TES shader invocations");
+ if (devinfo->gen >= 7) {
+ add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "N compute shader invocations");
+ }
+
+ if (devinfo->gen >= 10) {
+ /* Reuse existing CS invocation register until we can expose this new
+ * one.
+ */
+ add_basic_stat_reg(query, CS_INVOCATION_COUNT,
+ "Reserved1");
+ }
+
+ query->data_size = sizeof(uint64_t) * query->n_counters;
+}
+
+static void
fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
const char *name,
uint32_t data_offset,
@@ -618,9 +813,9 @@ fill_mdapi_perf_query_counter(struct gen_perf_query_info *query,
sizeof(struct_name.field_name[0]), \
GEN_PERF_COUNTER_DATA_TYPE_##type_name)
-void
-gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf)
+static void
+register_mdapi_oa_query(const struct gen_device_info *devinfo,
+ struct gen_perf_config *perf)
{
struct gen_perf_query_info *query = NULL;
@@ -632,7 +827,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
switch (devinfo->gen) {
case 7: {
- query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7);
+ query = append_query_info(perf, 1 + 45 + 16 + 7);
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
struct gen7_mdapi_metrics metric_data;
@@ -657,7 +852,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
break;
}
case 8: {
- query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16);
+ query = append_query_info(perf, 2 + 36 + 16 + 16);
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
struct gen8_mdapi_metrics metric_data;
@@ -694,7 +889,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
case 9:
case 10:
case 11: {
- query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
+ query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
struct gen9_mdapi_metrics metric_data;
@@ -756,62 +951,6 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
}
}
-void
-gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf)
-{
- if (!(devinfo->gen >= 7 && devinfo->gen <= 11))
- return;
-
- struct gen_perf_query_info *query =
- gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS);
-
- query->kind = GEN_PERF_QUERY_TYPE_PIPELINE;
- query->name = "Intel_Raw_Pipeline_Statistics_Query";
-
- /* The order has to match mdapi_pipeline_metrics. */
- gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT,
- "N vertices submitted");
- gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,
- "N primitives submitted");
- gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT,
- "N vertex shader invocations");
- gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT,
- "N geometry shader invocations");
- gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,
- "N geometry shader primitives emitted");
- gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT,
- "N primitives entering clipping");
- gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,
- "N primitives leaving clipping");
- if (devinfo->is_haswell || devinfo->gen == 8) {
- gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,
- "N fragment shader invocations",
- "N fragment shader invocations");
- } else {
- gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT,
- "N fragment shader invocations");
- }
- gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT,
- "N TCS shader invocations");
- gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT,
- "N TES shader invocations");
- if (devinfo->gen >= 7) {
- gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "N compute shader invocations");
- }
-
- if (devinfo->gen >= 10) {
- /* Reuse existing CS invocation register until we can expose this new
- * one.
- */
- gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT,
- "Reserved1");
- }
-
- query->data_size = sizeof(uint64_t) * query->n_counters;
-}
-
uint64_t
gen_perf_query_get_metric_id(struct gen_perf_config *perf,
const struct gen_perf_query_info *query)
@@ -1012,6 +1151,17 @@ gen_perf_dec_n_users(struct gen_perf_context *perf_ctx)
}
void
+gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo,
+ int drm_fd)
+{
+ load_pipeline_statistic_metrics(perf_cfg, devinfo);
+ gen_perf_query_register_mdapi_statistic_query(perf_cfg, devinfo);
+ if (load_oa_metrics(perf_cfg, drm_fd, devinfo))
+ register_mdapi_oa_query(devinfo, perf_cfg);
+}
+
+void
gen_perf_init_context(struct gen_perf_context *perf_ctx,
struct gen_perf_config *perf_cfg,
void * ctx, /* driver context (eg, brw_context) */
diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h
index f66a088e260..68e2a985bad 100644
--- a/src/intel/perf/gen_perf.h
+++ b/src/intel/perf/gen_perf.h
@@ -505,6 +505,9 @@ struct gen_perf_context {
int n_query_instances;
};
+void gen_perf_init_metrics(struct gen_perf_config *perf_cfg,
+ const struct gen_device_info *devinfo,
+ int drm_fd);
void gen_perf_init_context(struct gen_perf_context *perf_ctx,
struct gen_perf_config *perf_cfg,
void * ctx, /* driver context (eg, brw_context) */
@@ -532,58 +535,6 @@ gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter)
}
}
-static inline struct gen_perf_query_info *
-gen_perf_query_append_query_info(struct gen_perf_config *perf, int max_counters)
-{
- struct gen_perf_query_info *query;
-
- perf->queries = reralloc(perf, perf->queries,
- struct gen_perf_query_info,
- ++perf->n_queries);
- query = &perf->queries[perf->n_queries - 1];
- memset(query, 0, sizeof(*query));
-
- if (max_counters > 0) {
- query->max_counters = max_counters;
- query->counters =
- rzalloc_array(perf, struct gen_perf_query_counter, max_counters);
- }
-
- return query;
-}
-
-static inline void
-gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query,
- uint32_t reg,
- uint32_t numerator,
- uint32_t denominator,
- const char *name,
- const char *description)
-{
- struct gen_perf_query_counter *counter;
-
- assert(query->n_counters < query->max_counters);
-
- counter = &query->counters[query->n_counters];
- counter->name = name;
- counter->desc = description;
- counter->type = GEN_PERF_COUNTER_TYPE_RAW;
- counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64;
- counter->offset = sizeof(uint64_t) * query->n_counters;
- counter->pipeline_stat.reg = reg;
- counter->pipeline_stat.numerator = numerator;
- counter->pipeline_stat.denominator = denominator;
-
- query->n_counters++;
-}
-
-static inline void
-gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query,
- uint32_t reg, const char *name)
-{
- gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name);
-}
-
static inline struct gen_perf_config *
gen_perf_new(void *ctx)
{
@@ -591,8 +542,6 @@ gen_perf_new(void *ctx)
return perf;
}
-bool gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd,
- const struct gen_device_info *devinfo);
bool gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid,
uint64_t *metric_id);
@@ -605,10 +554,6 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result,
const uint32_t *start,
const uint32_t *end);
void gen_perf_query_result_clear(struct gen_perf_query_result *result);
-void gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf);
-void gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo,
- struct gen_perf_config *perf);
uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf,
const struct gen_perf_query_info *query);
struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf);