diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/perf/gen_perf.c | 282 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.h | 61 |
2 files changed, 219 insertions, 124 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 5fb6044a859..4e29bc17c0a 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -61,6 +61,11 @@ #define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 #define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) +#define GEN6_SO_PRIM_STORAGE_NEEDED 0x2280 +#define GEN7_SO_PRIM_STORAGE_NEEDED(n) (0x5240 + (n) * 8) +#define GEN6_SO_NUM_PRIMS_WRITTEN 0x2288 +#define GEN7_SO_NUM_PRIMS_WRITTEN(n) (0x5200 + (n) * 8) + #define MAP_READ (1 << 0) #define MAP_WRITE (1 << 1) @@ -165,13 +170,32 @@ read_sysfs_drm_device_file_uint64(struct gen_perf_config *perf, return read_file_uint64(buf, value); } +static inline struct gen_perf_query_info * +append_query_info(struct gen_perf_config *perf, int max_counters) +{ + struct gen_perf_query_info *query; + + perf->queries = reralloc(perf, perf->queries, + struct gen_perf_query_info, + ++perf->n_queries); + query = &perf->queries[perf->n_queries - 1]; + memset(query, 0, sizeof(*query)); + + if (max_counters > 0) { + query->max_counters = max_counters; + query->counters = + rzalloc_array(perf, struct gen_perf_query_counter, max_counters); + } + + return query; +} + static void register_oa_config(struct gen_perf_config *perf, const struct gen_perf_query_info *query, uint64_t config_id) { - struct gen_perf_query_info *registred_query = - gen_perf_query_append_query_info(perf, 0); + struct gen_perf_query_info *registred_query = append_query_info(perf, 0); *registred_query = *query; registred_query->oa_metrics_set_id = config_id; @@ -395,8 +419,123 @@ get_register_queries_function(const struct gen_device_info *devinfo) return NULL; } -bool -gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd, +static inline void +add_stat_reg(struct gen_perf_query_info *query, uint32_t reg, + uint32_t numerator, uint32_t denominator, + const char *name, const char *description) +{ + struct gen_perf_query_counter *counter; + + assert(query->n_counters < query->max_counters); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +add_basic_stat_reg(struct gen_perf_query_info *query, + uint32_t reg, const char *name) +{ + add_stat_reg(query, reg, 1, 1, name, name); +} + +static void +load_pipeline_statistic_metrics(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo) +{ + struct gen_perf_query_info *query = + append_query_info(perf_cfg, MAX_STAT_COUNTERS); + + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; + query->name = "Pipeline Statistics Registers"; + + add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); + + if (devinfo->gen == 6) { + add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, + "SO_PRIM_STORAGE_NEEDED", + "N geometry shader stream-out primitives (total)"); + add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, + "SO_NUM_PRIMS_WRITTEN", + "N geometry shader stream-out primitives (written)"); + } else { + add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 0)", + "N stream-out (stream 0) primitives (total)"); + add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 1)", + "N stream-out (stream 1) primitives (total)"); + add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 2)", + "N stream-out (stream 2) primitives (total)"); + add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 3)", + "N stream-out (stream 3) primitives (total)"); + add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 0)", + "N stream-out (stream 0) primitives (written)"); + add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 1)", + "N stream-out (stream 1) primitives (written)"); + add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 2)", + "N stream-out (stream 2) primitives (written)"); + add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 3)", + "N stream-out (stream 3) primitives (written)"); + } + + add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); + + add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); + + add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); + + if (devinfo->is_haswell || devinfo->gen == 8) { + add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); + } else { + add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); + } + + add_basic_stat_reg(query, PS_DEPTH_COUNT, + "N z-pass fragments"); + + if (devinfo->gen >= 7) { + add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); + } + + query->data_size = sizeof(uint64_t) * query->n_counters; +} + +static bool +load_oa_metrics(struct gen_perf_config *perf, int fd, const struct gen_device_info *devinfo) { perf_register_oa_queries_t oa_register = get_register_queries_function(devinfo); @@ -583,6 +722,62 @@ gen_perf_query_result_clear(struct gen_perf_query_result *result) } static void +gen_perf_query_register_mdapi_statistic_query(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo) +{ + if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) + return; + + struct gen_perf_query_info *query = + append_query_info(perf_cfg, MAX_STAT_COUNTERS); + + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; + query->name = "Intel_Raw_Pipeline_Statistics_Query"; + + /* The order has to match mdapi_pipeline_metrics. */ + add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); + add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); + add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); + if (devinfo->is_haswell || devinfo->gen == 8) { + add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); + } else { + add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); + } + add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); + if (devinfo->gen >= 7) { + add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); + } + + if (devinfo->gen >= 10) { + /* Reuse existing CS invocation register until we can expose this new + * one. + */ + add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "Reserved1"); + } + + query->data_size = sizeof(uint64_t) * query->n_counters; +} + +static void fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, const char *name, uint32_t data_offset, @@ -618,9 +813,9 @@ fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, sizeof(struct_name.field_name[0]), \ GEN_PERF_COUNTER_DATA_TYPE_##type_name) -void -gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, - struct gen_perf_config *perf) +static void +register_mdapi_oa_query(const struct gen_device_info *devinfo, + struct gen_perf_config *perf) { struct gen_perf_query_info *query = NULL; @@ -632,7 +827,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, switch (devinfo->gen) { case 7: { - query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7); + query = append_query_info(perf, 1 + 45 + 16 + 7); query->oa_format = I915_OA_FORMAT_A45_B8_C8; struct gen7_mdapi_metrics metric_data; @@ -657,7 +852,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, break; } case 8: { - query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16); + query = append_query_info(perf, 2 + 36 + 16 + 16); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct gen8_mdapi_metrics metric_data; @@ -694,7 +889,7 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, case 9: case 10: case 11: { - query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); + query = append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; struct gen9_mdapi_metrics metric_data; @@ -756,62 +951,6 @@ gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, } } -void -gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo, - struct gen_perf_config *perf) -{ - if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) - return; - - struct gen_perf_query_info *query = - gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS); - - query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; - query->name = "Intel_Raw_Pipeline_Statistics_Query"; - - /* The order has to match mdapi_pipeline_metrics. */ - gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); - gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, - "N geometry shader invocations"); - gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, - "N geometry shader primitives emitted"); - gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, - "N primitives entering clipping"); - gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, - "N primitives leaving clipping"); - if (devinfo->is_haswell || devinfo->gen == 8) { - gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, - "N fragment shader invocations", - "N fragment shader invocations"); - } else { - gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, - "N fragment shader invocations"); - } - gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, - "N TCS shader invocations"); - gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, - "N TES shader invocations"); - if (devinfo->gen >= 7) { - gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); - } - - if (devinfo->gen >= 10) { - /* Reuse existing CS invocation register until we can expose this new - * one. - */ - gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "Reserved1"); - } - - query->data_size = sizeof(uint64_t) * query->n_counters; -} - uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf, const struct gen_perf_query_info *query) @@ -1012,6 +1151,17 @@ gen_perf_dec_n_users(struct gen_perf_context *perf_ctx) } void +gen_perf_init_metrics(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo, + int drm_fd) +{ + load_pipeline_statistic_metrics(perf_cfg, devinfo); + gen_perf_query_register_mdapi_statistic_query(perf_cfg, devinfo); + if (load_oa_metrics(perf_cfg, drm_fd, devinfo)) + register_mdapi_oa_query(devinfo, perf_cfg); +} + +void gen_perf_init_context(struct gen_perf_context *perf_ctx, struct gen_perf_config *perf_cfg, void * ctx, /* driver context (eg, brw_context) */ diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index f66a088e260..68e2a985bad 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -505,6 +505,9 @@ struct gen_perf_context { int n_query_instances; }; +void gen_perf_init_metrics(struct gen_perf_config *perf_cfg, + const struct gen_device_info *devinfo, + int drm_fd); void gen_perf_init_context(struct gen_perf_context *perf_ctx, struct gen_perf_config *perf_cfg, void * ctx, /* driver context (eg, brw_context) */ @@ -532,58 +535,6 @@ gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) } } -static inline struct gen_perf_query_info * -gen_perf_query_append_query_info(struct gen_perf_config *perf, int max_counters) -{ - struct gen_perf_query_info *query; - - perf->queries = reralloc(perf, perf->queries, - struct gen_perf_query_info, - ++perf->n_queries); - query = &perf->queries[perf->n_queries - 1]; - memset(query, 0, sizeof(*query)); - - if (max_counters > 0) { - query->max_counters = max_counters; - query->counters = - rzalloc_array(perf, struct gen_perf_query_counter, max_counters); - } - - return query; -} - -static inline void -gen_perf_query_info_add_stat_reg(struct gen_perf_query_info *query, - uint32_t reg, - uint32_t numerator, - uint32_t denominator, - const char *name, - const char *description) -{ - struct gen_perf_query_counter *counter; - - assert(query->n_counters < query->max_counters); - - counter = &query->counters[query->n_counters]; - counter->name = name; - counter->desc = description; - counter->type = GEN_PERF_COUNTER_TYPE_RAW; - counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_UINT64; - counter->offset = sizeof(uint64_t) * query->n_counters; - counter->pipeline_stat.reg = reg; - counter->pipeline_stat.numerator = numerator; - counter->pipeline_stat.denominator = denominator; - - query->n_counters++; -} - -static inline void -gen_perf_query_info_add_basic_stat_reg(struct gen_perf_query_info *query, - uint32_t reg, const char *name) -{ - gen_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); -} - static inline struct gen_perf_config * gen_perf_new(void *ctx) { @@ -591,8 +542,6 @@ gen_perf_new(void *ctx) return perf; } -bool gen_perf_load_oa_metrics(struct gen_perf_config *perf, int fd, - const struct gen_device_info *devinfo); bool gen_perf_load_metric_id(struct gen_perf_config *perf, const char *guid, uint64_t *metric_id); @@ -605,10 +554,6 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, const uint32_t *start, const uint32_t *end); void gen_perf_query_result_clear(struct gen_perf_query_result *result); -void gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo, - struct gen_perf_config *perf); -void gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, - struct gen_perf_config *perf); uint64_t gen_perf_query_get_metric_id(struct gen_perf_config *perf, const struct gen_perf_query_info *query); struct oa_sample_buf * gen_perf_get_free_sample_buf(struct gen_perf_context *perf); |