diff options
author | Lionel Landwerlin <[email protected]> | 2018-06-08 15:29:51 +0100 |
---|---|---|
committer | Lionel Landwerlin <[email protected]> | 2019-04-17 14:10:42 +0100 |
commit | 41b54b5faf27632d1a70145880b474fbaeabbb2d (patch) | |
tree | dbd18223647e6bfd431aca51467cd1e7eea2c471 /src/mesa | |
parent | f6bba7760f7985e08dccde3472e53d62fa2240d0 (diff) |
i965: move OA accumulation code to intel/perf
We'll want to reuse this in our Vulkan extension.
Signed-off-by: Lionel Landwerlin <[email protected]>
Reviewed-by: Mark Janes <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 134 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.h | 37 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c | 41 |
3 files changed, 45 insertions, 167 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 3207be11569..d3ae3c114f8 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -542,55 +542,6 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, reap_old_sample_buffers(brw); } -/** - * Given pointers to starting and ending OA snapshots, add the deltas for each - * counter to the results. - */ -static void -add_deltas(struct brw_context *brw, - struct brw_perf_query_object *obj, - const uint32_t *start, - const uint32_t *end) -{ - const struct gen_perf_query_info *query = obj->query; - uint64_t *accumulator = obj->oa.accumulator; - int idx = 0; - int i; - - obj->oa.reports_accumulated++; - - switch (query->oa_format) { - case I915_OA_FORMAT_A32u40_A4u32_B8_C8: - gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ - gen_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ - - /* 32x 40bit A counters... */ - for (i = 0; i < 32; i++) - gen_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); - - /* 4x 32bit A counters... */ - for (i = 0; i < 4; i++) - gen_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, - accumulator + idx++); - - /* 8x 32bit B counters + 8x 32bit C counters... */ - for (i = 0; i < 16; i++) - gen_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, - accumulator + idx++); - - break; - case I915_OA_FORMAT_A45_B8_C8: - gen_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ - - for (i = 0; i < 61; i++) - gen_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); - - break; - default: - unreachable("Can't accumulate OA counters in unknown format"); - } -} - static bool inc_n_oa_users(struct brw_context *brw) { @@ -801,8 +752,6 @@ accumulate_oa_reports(struct brw_context *brw, goto error; } - obj->oa.hw_id = start[2]; - /* See if we have any periodic reports to accumulate too... */ /* N.B. The oa.samples_head was set when the query began and @@ -856,11 +805,11 @@ accumulate_oa_reports(struct brw_context *brw, * of OA counters while any other context is acctive. */ if (devinfo->gen >= 8) { - if (in_ctx && report[2] != obj->oa.hw_id) { + if (in_ctx && report[2] != obj->oa.result.hw_id) { DBG("i915 perf: Switch AWAY (observed by ID change)\n"); in_ctx = false; out_duration = 0; - } else if (in_ctx == false && report[2] == obj->oa.hw_id) { + } else if (in_ctx == false && report[2] == obj->oa.result.hw_id) { DBG("i915 perf: Switch TO\n"); in_ctx = true; @@ -877,18 +826,20 @@ accumulate_oa_reports(struct brw_context *brw, if (out_duration >= 1) add = false; } else if (in_ctx) { - assert(report[2] == obj->oa.hw_id); + assert(report[2] == obj->oa.result.hw_id); DBG("i915 perf: Continuation IN\n"); } else { - assert(report[2] != obj->oa.hw_id); + assert(report[2] != obj->oa.result.hw_id); DBG("i915 perf: Continuation OUT\n"); add = false; out_duration++; } } - if (add) - add_deltas(brw, obj, last, report); + if (add) { + gen_perf_query_result_accumulate(&obj->oa.result, obj->query, + last, report); + } last = report; @@ -907,7 +858,8 @@ accumulate_oa_reports(struct brw_context *brw, end: - add_deltas(brw, obj, last, end); + gen_perf_query_result_accumulate(&obj->oa.result, obj->query, + last, end); DBG("Marking %d accumulated - results gathered\n", o->Id); @@ -1211,8 +1163,7 @@ brw_begin_perf_query(struct gl_context *ctx, */ buf->refcount++; - obj->oa.hw_id = 0xffffffff; - memset(obj->oa.accumulator, 0, sizeof(obj->oa.accumulator)); + gen_perf_query_result_clear(&obj->oa.result); obj->oa.results_accumulated = false; add_to_unaccumulated_query_list(brw, obj); @@ -1382,61 +1333,14 @@ brw_is_perf_query_ready(struct gl_context *ctx, } static void -gen8_read_report_clock_ratios(const uint32_t *report, - uint64_t *slice_freq_hz, - uint64_t *unslice_freq_hz) -{ - /* The lower 16bits of the RPT_ID field of the OA reports contains a - * snapshot of the bits coming from the RP_FREQ_NORMAL register and is - * divided this way : - * - * RPT_ID[31:25]: RP_FREQ_NORMAL[20:14] (low squashed_slice_clock_frequency) - * RPT_ID[10:9]: RP_FREQ_NORMAL[22:21] (high squashed_slice_clock_frequency) - * RPT_ID[8:0]: RP_FREQ_NORMAL[31:23] (squashed_unslice_clock_frequency) - * - * RP_FREQ_NORMAL[31:23]: Software Unslice Ratio Request - * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) - * - * RP_FREQ_NORMAL[22:14]: Software Slice Ratio Request - * Multiple of 33.33MHz 2xclk (16 MHz 1xclk) - */ - - uint32_t unslice_freq = report[0] & 0x1ff; - uint32_t slice_freq_low = (report[0] >> 25) & 0x7f; - uint32_t slice_freq_high = (report[0] >> 9) & 0x3; - uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7); - - *slice_freq_hz = slice_freq * 16666667ULL; - *unslice_freq_hz = unslice_freq * 16666667ULL; -} - -static void read_slice_unslice_frequencies(struct brw_context *brw, struct brw_perf_query_object *obj) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - uint32_t *begin_report, *end_report; - - /* Slice/Unslice frequency is only available in the OA reports when the - * "Disable OA reports due to clock ratio change" field in - * OA_DEBUG_REGISTER is set to 1. This is how the kernel programs this - * global register (see drivers/gpu/drm/i915/i915_perf.c) - * - * Documentation says this should be available on Gen9+ but experimentation - * shows that Gen8 reports similar values, so we enable it there too. - */ - if (devinfo->gen < 8) - return; - - begin_report = obj->oa.map; - end_report = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES; + uint32_t *begin_report = obj->oa.map, *end_report = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES; - gen8_read_report_clock_ratios(begin_report, - &obj->oa.slice_frequency[0], - &obj->oa.unslice_frequency[0]); - gen8_read_report_clock_ratios(end_report, - &obj->oa.slice_frequency[1], - &obj->oa.unslice_frequency[1]); + gen_perf_query_result_read_frequencies(&obj->oa.result, + devinfo, begin_report, end_report); } static void @@ -1488,13 +1392,15 @@ get_oa_counter_data(struct brw_context *brw, switch (counter->data_type) { case GEN_PERF_COUNTER_DATA_TYPE_UINT64: out_uint64 = (uint64_t *)(data + counter->offset); - *out_uint64 = counter->oa_counter_read_uint64(perf, query, - obj->oa.accumulator); + *out_uint64 = + counter->oa_counter_read_uint64(perf, query, + obj->oa.result.accumulator); break; case GEN_PERF_COUNTER_DATA_TYPE_FLOAT: out_float = (float *)(data + counter->offset); - *out_float = counter->oa_counter_read_float(perf, query, - obj->oa.accumulator); + *out_float = + counter->oa_counter_read_float(perf, query, + obj->oa.result.accumulator); break; default: /* So far we aren't using uint32, double or bool32... */ diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index ca0503422ca..86632e06a61 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -28,6 +28,8 @@ #include "brw_context.h" +#include "perf/gen_perf.h" + struct gen_perf_query_info; /* @@ -38,15 +40,6 @@ struct gen_perf_query_info; #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) -/* - * The largest OA formats we can use include: - * For Haswell: - * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. - * For Gen8+ - * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters - */ -#define MAX_OA_REPORT_COUNTERS 62 - /** * i965 representation of a performance query object. * @@ -94,16 +87,6 @@ struct brw_perf_query_object struct exec_node *samples_head; /** - * Storage for the final accumulated OA counters. - */ - uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; - - /** - * Hw ID used by the context on which the query was running. - */ - uint32_t hw_id; - - /** * false while in the unaccumulated_elements list, and set to * true when the final, end MI_RPC snapshot has been * accumulated. @@ -111,26 +94,14 @@ struct brw_perf_query_object bool results_accumulated; /** - * Number of reports accumulated to produce the results. - */ - uint32_t reports_accumulated; - - /** * Frequency of the GT at begin and end of the query. */ uint64_t gt_frequency[2]; /** - * Frequency in the slices of the GT at the begin and end of the - * query. - */ - uint64_t slice_frequency[2]; - - /** - * Frequency in the unslice of the GT at the begin and end of the - * query. + * Accumulated OA results between begin and end of the query. */ - uint64_t unslice_frequency[2]; + struct gen_perf_query_result result; } oa; struct { diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c index 916b14c14ba..159f31441c5 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query_mdapi.c @@ -34,6 +34,7 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, uint8_t *data) { const struct gen_device_info *devinfo = &brw->screen->devinfo; + const struct gen_perf_query_result *result = &obj->oa.result; switch (devinfo->gen) { case 7: { @@ -45,15 +46,15 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, assert(devinfo->is_haswell); for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++) - mdapi_data->ACounters[i] = obj->oa.accumulator[1 + i]; + mdapi_data->ACounters[i] = result->accumulator[1 + i]; for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) { mdapi_data->NOACounters[i] = - obj->oa.accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i]; + result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i]; } - mdapi_data->ReportsCount = obj->oa.reports_accumulated; - mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]); + mdapi_data->ReportsCount = result->reports_accumulated; + mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]); mdapi_data->CoreFrequency = obj->oa.gt_frequency[1]; mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1]; return sizeof(*mdapi_data); @@ -65,20 +66,20 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, return 0; for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++) - mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i]; + mdapi_data->OaCntr[i] = result->accumulator[2 + i]; for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) { mdapi_data->NoaCntr[i] = - obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; + result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; } - mdapi_data->ReportId = obj->oa.hw_id; - mdapi_data->ReportsCount = obj->oa.reports_accumulated; - mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]); - mdapi_data->GPUTicks = obj->oa.accumulator[1]; + mdapi_data->ReportId = result->hw_id; + mdapi_data->ReportsCount = result->reports_accumulated; + mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]); + mdapi_data->GPUTicks = result->accumulator[1]; mdapi_data->CoreFrequency = obj->oa.gt_frequency[1]; mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1]; - mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL; - mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL; + mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; + mdapi_data->UnsliceFrequency = (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL; return sizeof(*mdapi_data); } @@ -91,20 +92,20 @@ brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, return 0; for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++) - mdapi_data->OaCntr[i] = obj->oa.accumulator[2 + i]; + mdapi_data->OaCntr[i] = result->accumulator[2 + i]; for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) { mdapi_data->NoaCntr[i] = - obj->oa.accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; + result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i]; } - mdapi_data->ReportId = obj->oa.hw_id; - mdapi_data->ReportsCount = obj->oa.reports_accumulated; - mdapi_data->TotalTime = brw_timebase_scale(brw, obj->oa.accumulator[0]); - mdapi_data->GPUTicks = obj->oa.accumulator[1]; + mdapi_data->ReportId = result->hw_id; + mdapi_data->ReportsCount = result->reports_accumulated; + mdapi_data->TotalTime = brw_timebase_scale(brw, result->accumulator[0]); + mdapi_data->GPUTicks = result->accumulator[1]; mdapi_data->CoreFrequency = obj->oa.gt_frequency[1]; mdapi_data->CoreFrequencyChanged = obj->oa.gt_frequency[0] != obj->oa.gt_frequency[1]; - mdapi_data->SliceFrequency = (obj->oa.slice_frequency[0] + obj->oa.slice_frequency[1]) / 2ULL; - mdapi_data->UnsliceFrequency = (obj->oa.unslice_frequency[0] + obj->oa.unslice_frequency[1]) / 2ULL; + mdapi_data->SliceFrequency = (result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL; + mdapi_data->UnsliceFrequency = (result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL; return sizeof(*mdapi_data); } |