summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers
diff options
context:
space:
mode:
authorLionel Landwerlin <[email protected]>2017-03-30 15:46:40 +0100
committerLionel Landwerlin <[email protected]>2017-06-27 14:10:29 +0300
commit31b11f69f75ff92cb42a13bb2f6740c183f761df (patch)
tree31ee091210f7ff0a61a4f2d713c2ec6f6e20c086 /src/mesa/drivers
parent1fc7b951278428bd0fbbe040226737a44742f353 (diff)
i965: perf: keep on reading reports until delimiting timestamp
Due to an underlying hardware race condition, we have no guarantee that all the reports coming from the OA buffer related to the workload we're trying to measure have landed to memory by the time all the work submitted has completed. That means we need to keep on reading the OA stream until we read a report with a timestamp more recent than the timestamp recored by the MI_REPORT_PERF_COUNT at the end of the performance query. v2: fix uninitialized offset variable to 0 (Lionel) v3: rework the reading to avoid blocking the user of the API unless requested (Rob) v4: fix a bug that makes the i965 driver reading the perf stream when not necessary, leading to very long counter accumulation times (Lionel) Signed-off-by: Lionel Landwerlin <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/i965/brw_performance_query.c133
1 files changed, 113 insertions, 20 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
index dd392b1960c..4af06185680 100644
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
int refcount;
int len;
uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
+ uint32_t last_timestamp;
};
/**
@@ -244,6 +245,11 @@ struct brw_perf_query_object
struct brw_bo *bo;
/**
+ * Address of mapped of @bo
+ */
+ void *map;
+
+ /**
* The MI_REPORT_PERF_COUNT command lets us specify a unique
* ID that will be reflected in the resulting OA report
* that's written by the GPU. This is the ID we're expecting
@@ -681,11 +687,26 @@ discard_all_queries(struct brw_context *brw)
}
}
-static bool
-read_oa_samples(struct brw_context *brw)
+enum OaReadStatus {
+ OA_READ_STATUS_ERROR,
+ OA_READ_STATUS_UNFINISHED,
+ OA_READ_STATUS_FINISHED,
+};
+
+static enum OaReadStatus
+read_oa_samples_until(struct brw_context *brw,
+ uint32_t start_timestamp,
+ uint32_t end_timestamp)
{
+ struct exec_node *tail_node =
+ exec_list_get_tail(&brw->perfquery.sample_buffers);
+ struct brw_oa_sample_buf *tail_buf =
+ exec_node_data(struct brw_oa_sample_buf, tail_node, link);
+ uint32_t last_timestamp = tail_buf->last_timestamp;
+
while (1) {
struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
+ uint32_t offset;
int len;
while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
@@ -697,28 +718,94 @@ read_oa_samples(struct brw_context *brw)
if (len < 0) {
if (errno == EAGAIN)
- return true;
+ return ((last_timestamp - start_timestamp) >=
+ (end_timestamp - start_timestamp)) ?
+ OA_READ_STATUS_FINISHED :
+ OA_READ_STATUS_UNFINISHED;
else {
DBG("Error reading i915 perf samples: %m\n");
- return false;
}
- } else {
+ } else
DBG("Spurious EOF reading i915 perf samples\n");
- return false;
- }
+
+ return OA_READ_STATUS_ERROR;
}
buf->len = len;
exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
+
+ /* Go through the reports and update the last timestamp. */
+ offset = 0;
+ while (offset < buf->len) {
+ const struct drm_i915_perf_record_header *header =
+ (const struct drm_i915_perf_record_header *) &buf->buf[offset];
+ uint32_t *report = (uint32_t *) (header + 1);
+
+ if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
+ last_timestamp = report[1];
+
+ offset += header->size;
+ }
+
+ buf->last_timestamp = last_timestamp;
}
unreachable("not reached");
+ return OA_READ_STATUS_ERROR;
+}
+
+/**
+ * Try to read all the reports until either the delimiting timestamp
+ * or an error arises.
+ */
+static bool
+read_oa_samples_for_query(struct brw_context *brw,
+ struct brw_perf_query_object *obj)
+{
+ uint32_t *start;
+ uint32_t *last;
+ uint32_t *end;
+
+ /* We need the MI_REPORT_PERF_COUNT to land before we can start
+ * accumulate. */
+ assert(!brw_batch_references(&brw->batch, obj->oa.bo) &&
+ !brw_bo_busy(obj->oa.bo));
+
+ /* Map the BO once here and let accumulate_oa_reports() unmap
+ * it. */
+ if (obj->oa.map == NULL)
+ obj->oa.map = brw_bo_map(brw, obj->oa.bo, MAP_READ);
+
+ start = last = obj->oa.map;
+ end = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
+
+ if (start[0] != obj->oa.begin_report_id) {
+ DBG("Spurious start report id=%"PRIu32"\n", start[0]);
+ return true;
+ }
+ if (end[0] != (obj->oa.begin_report_id + 1)) {
+ DBG("Spurious end report id=%"PRIu32"\n", end[0]);
+ return true;
+ }
+
+ /* Read the reports until the end timestamp. */
+ switch (read_oa_samples_until(brw, start[1], end[1])) {
+ case OA_READ_STATUS_ERROR:
+ /* Fallthrough and let accumulate_oa_reports() deal with the
+ * error. */
+ case OA_READ_STATUS_FINISHED:
+ return true;
+ case OA_READ_STATUS_UNFINISHED:
+ return false;
+ }
+
+ unreachable("invalid read status");
return false;
}
/**
- * Accumulate raw OA counter values based on deltas between pairs
- * of OA reports.
+ * Accumulate raw OA counter values based on deltas between pairs of
+ * OA reports.
*
* Accumulation starts from the first report captured via
* MI_REPORT_PERF_COUNT (MI_RPC) by brw_begin_perf_query() until the
@@ -739,7 +826,6 @@ accumulate_oa_reports(struct brw_context *brw,
struct brw_perf_query_object *obj)
{
struct gl_perf_query_object *o = &obj->base;
- uint32_t *query_buffer;
uint32_t *start;
uint32_t *last;
uint32_t *end;
@@ -748,15 +834,10 @@ accumulate_oa_reports(struct brw_context *brw,
uint32_t ctx_id;
assert(o->Ready);
+ assert(obj->oa.map != NULL);
- /* Collect the latest periodic OA reports from i915 perf */
- if (!read_oa_samples(brw))
- goto error;
-
- query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ);
-
- start = last = query_buffer;
- end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t));
+ start = last = obj->oa.map;
+ end = obj->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
if (start[0] != obj->oa.begin_report_id) {
DBG("Spurious start report id=%"PRIu32"\n", start[0]);
@@ -864,6 +945,7 @@ end:
DBG("Marking %d accumulated - results gathered\n", o->Id);
brw_bo_unmap(obj->oa.bo);
+ obj->oa.map = NULL;
obj->oa.results_accumulated = true;
drop_from_unaccumulated_query_list(brw, obj);
dec_n_oa_users(brw);
@@ -873,6 +955,7 @@ end:
error:
brw_bo_unmap(obj->oa.bo);
+ obj->oa.map = NULL;
discard_all_queries(brw);
}
@@ -1249,6 +1332,16 @@ brw_wait_perf_query(struct gl_context *ctx, struct gl_perf_query_object *o)
intel_batchbuffer_flush(brw);
brw_bo_wait_rendering(brw, bo);
+
+ /* Due to a race condition between the OA unit signaling report
+ * availability and the report actually being written into memory,
+ * we need to wait for all the reports to come in before we can
+ * read them.
+ */
+ if (obj->query->kind == OA_COUNTERS) {
+ while (!read_oa_samples_for_query(brw, obj))
+ ;
+ }
}
static bool
@@ -1266,8 +1359,8 @@ brw_is_perf_query_ready(struct gl_context *ctx,
return (obj->oa.results_accumulated ||
(obj->oa.bo &&
!brw_batch_references(&brw->batch, obj->oa.bo) &&
- !brw_bo_busy(obj->oa.bo)));
-
+ !brw_bo_busy(obj->oa.bo) &&
+ read_oa_samples_for_query(brw, obj)));
case PIPELINE_STATS:
return (obj->pipeline_stats.bo &&
!brw_batch_references(&brw->batch, obj->pipeline_stats.bo) &&