diff options
author | Mark Janes <[email protected]> | 2019-05-29 08:48:35 -0700 |
---|---|---|
committer | Mark Janes <[email protected]> | 2019-08-07 21:33:55 -0700 |
commit | e091f33990f9729bc4ccee7b3b04837f0d7b24b4 (patch) | |
tree | 62d54318024d6963b4d40545a4460a2fe28b7873 /src/intel | |
parent | 2446f5cfd8408a69b3ebf2543f08b3ca2fb000d7 (diff) |
intel/perf: enumerate query-based metrics in perf
Iris and i965 both need to enumerate the available metrics, so these
routines must be located in perf.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/perf/gen_perf.c | 232 | ||||
-rw-r--r-- | src/intel/perf/gen_perf.h | 4 |
2 files changed, 236 insertions, 0 deletions
diff --git a/src/intel/perf/gen_perf.c b/src/intel/perf/gen_perf.c index 9289accb91f..d28da495761 100644 --- a/src/intel/perf/gen_perf.c +++ b/src/intel/perf/gen_perf.c @@ -33,6 +33,7 @@ #include "common/gen_gem.h" #include "gen_perf.h" +#include "perf/gen_perf_mdapi.h" #include "perf/gen_perf_metrics.h" #include "dev/gen_debug.h" @@ -558,3 +559,234 @@ gen_perf_query_result_clear(struct gen_perf_query_result *result) memset(result, 0, sizeof(*result)); result->hw_id = 0xffffffff; /* invalid */ } + +static void +fill_mdapi_perf_query_counter(struct gen_perf_query_info *query, + const char *name, + uint32_t data_offset, + uint32_t data_size, + enum gen_perf_counter_data_type data_type) +{ + struct gen_perf_query_counter *counter = &query->counters[query->n_counters]; + + assert(query->n_counters <= query->max_counters); + + counter->name = name; + counter->desc = "Raw counter value"; + counter->type = GEN_PERF_COUNTER_TYPE_RAW; + counter->data_type = data_type; + counter->offset = data_offset; + + query->n_counters++; + + assert(counter->offset + gen_perf_query_counter_get_size(counter) <= query->data_size); +} + +#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \ + fill_mdapi_perf_query_counter(query, #field_name, \ + (uint8_t *) &struct_name.field_name - \ + (uint8_t *) &struct_name, \ + sizeof(struct_name.field_name), \ + GEN_PERF_COUNTER_DATA_TYPE_##type_name) +#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \ + fill_mdapi_perf_query_counter(query, \ + ralloc_asprintf(ctx, "%s%i", #field_name, idx), \ + (uint8_t *) &struct_name.field_name[idx] - \ + (uint8_t *) &struct_name, \ + sizeof(struct_name.field_name[0]), \ + GEN_PERF_COUNTER_DATA_TYPE_##type_name) + +void +gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, + struct gen_perf_config *perf) +{ + struct gen_perf_query_info *query = NULL; + + /* MDAPI requires different structures for pretty much every generation + * (right now we have definitions for gen 7 to 11). + */ + if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) + return; + + switch (devinfo->gen) { + case 7: { + query = gen_perf_query_append_query_info(perf, 1 + 45 + 16 + 7); + query->oa_format = I915_OA_FORMAT_A45_B8_C8; + + struct gen7_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, ACounters, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NOACounters, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + break; + } + case 8: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16); + query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + + struct gen8_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + break; + } + case 9: + case 10: + case 11: { + query = gen_perf_query_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2); + query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8; + + struct gen9_mdapi_metrics metric_data; + query->data_size = sizeof(metric_data); + + MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64); + for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, OaCntr, i, UINT64); + } + for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, NoaCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32); + for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) { + MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query, + metric_data, UserCntr, i, UINT64); + } + MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32); + MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32); + break; + } + default: + unreachable("Unsupported gen"); + break; + } + + query->kind = GEN_PERF_QUERY_TYPE_RAW; + query->name = "Intel_Raw_Hardware_Counters_Set_0_Query"; + query->guid = GEN_PERF_QUERY_GUID_MDAPI; + + { + /* Accumulation buffer offsets copied from an actual query... */ + const struct gen_perf_query_info *copy_query = + &perf->queries[0]; + + query->gpu_time_offset = copy_query->gpu_time_offset; + query->gpu_clock_offset = copy_query->gpu_clock_offset; + query->a_offset = copy_query->a_offset; + query->b_offset = copy_query->b_offset; + query->c_offset = copy_query->c_offset; + } +} + +void +gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo, + struct gen_perf_config *perf) +{ + if (!(devinfo->gen >= 7 && devinfo->gen <= 11)) + return; + + struct gen_perf_query_info *query = + gen_perf_query_append_query_info(perf, MAX_STAT_COUNTERS); + + query->kind = GEN_PERF_QUERY_TYPE_PIPELINE; + query->name = "Intel_Raw_Pipeline_Statistics_Query"; + + /* The order has to match mdapi_pipeline_metrics. */ + gen_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + gen_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + gen_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); + gen_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + gen_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); + gen_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + gen_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); + if (devinfo->is_haswell || devinfo->gen == 8) { + gen_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); + } else { + gen_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); + } + gen_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + gen_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); + if (devinfo->gen >= 7) { + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); + } + + if (devinfo->gen >= 10) { + /* Reuse existing CS invocation register until we can expose this new + * one. + */ + gen_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "Reserved1"); + } + + query->data_size = sizeof(uint64_t) * query->n_counters; +} + diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 2c34a37a08f..2a8d9310f22 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -316,6 +316,10 @@ void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, const uint32_t *start, const uint32_t *end); void gen_perf_query_result_clear(struct gen_perf_query_result *result); +void gen_perf_query_register_mdapi_statistic_query(const struct gen_device_info *devinfo, + struct gen_perf_config *perf); +void gen_perf_query_register_mdapi_oa_query(const struct gen_device_info *devinfo, + struct gen_perf_config *perf); #endif /* GEN_PERF_H */ |