diff options
author | Lionel Landwerlin <[email protected]> | 2018-03-06 17:09:21 +0000 |
---|---|---|
committer | Lionel Landwerlin <[email protected]> | 2018-04-23 18:30:10 +0100 |
commit | d71b442416be8ddbed93347d6a2f14a1294bec77 (patch) | |
tree | 1614bc55d5141722fd878a5ef5dcd9f3d51c2632 | |
parent | e37e6435895b27024e857f3b12269578613bd920 (diff) |
i965: perf: extract utility functions
We would like to reuse a number of the functions and structures in
another file in a future commit.
We also move the previous content of brw_performance_query.h into
brw_performance_query_metrics.h to be included by generated metrics
files.
Signed-off-by: Lionel Landwerlin <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_oa.py | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.c | 299 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query.h | 186 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_performance_query_metrics.h | 57 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/meson.build | 1 |
6 files changed, 294 insertions, 252 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 3479ceb9d16..31ecbe6d30e 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -37,6 +37,7 @@ i965_FILES = \ brw_pipe_control.h \ brw_performance_query.h \ brw_performance_query.c \ + brw_performance_query_metrics.h \ brw_program.c \ brw_program.h \ brw_program_binary.c \ diff --git a/src/mesa/drivers/dri/i965/brw_oa.py b/src/mesa/drivers/dri/i965/brw_oa.py index 7bf7987b4ca..4c70f253d73 100644 --- a/src/mesa/drivers/dri/i965/brw_oa.py +++ b/src/mesa/drivers/dri/i965/brw_oa.py @@ -629,7 +629,7 @@ def main(): c(textwrap.dedent("""\ #include "brw_context.h" - #include "brw_performance_query.h" + #include "brw_performance_query_metrics.h" #define MIN(a, b) ((a < b) ? (a) : (b)) diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index ef5401a21b4..44cac85c6e6 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -76,15 +76,6 @@ #define FILE_DEBUG_FLAG DEBUG_PERFMON -/* - * The largest OA formats we can use include: - * For Haswell: - * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. - * For Gen8+ - * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters - */ -#define MAX_OA_REPORT_COUNTERS 62 - #define OAREPORT_REASON_MASK 0x3f #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) @@ -216,85 +207,6 @@ struct brw_oa_sample_buf { uint32_t last_timestamp; }; -/** - * i965 representation of a performance query object. - * - * NB: We want to keep this structure relatively lean considering that - * applications may expect to allocate enough objects to be able to - * query around all draw calls in a frame. - */ -struct brw_perf_query_object -{ - struct gl_perf_query_object base; - - const struct brw_perf_query_info *query; - - /* See query->kind to know which state below is in use... */ - union { - struct { - - /** - * BO containing OA counter snapshots at query Begin/End time. - */ - struct brw_bo *bo; - - /** - * Address of mapped of @bo - */ - void *map; - - /** - * The MI_REPORT_PERF_COUNT command lets us specify a unique - * ID that will be reflected in the resulting OA report - * that's written by the GPU. This is the ID we're expecting - * in the begin report and the the end report should be - * @begin_report_id + 1. - */ - int begin_report_id; - - /** - * Reference the head of the brw->perfquery.sample_buffers - * list at the time that the query started (so we only need - * to look at nodes after this point when looking for samples - * related to this query) - * - * (See struct brw_oa_sample_buf description for more details) - */ - struct exec_node *samples_head; - - /** - * Storage for the final accumulated OA counters. - */ - uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; - - /** - * Hw ID used by the context on which the query was running. - */ - uint32_t hw_id; - - /** - * false while in the unaccumulated_elements list, and set to - * true when the final, end MI_RPC snapshot has been - * accumulated. - */ - bool results_accumulated; - - /** - * Number of reports accumulated to produce the results. - */ - uint32_t reports_accumulated; - } oa; - - struct { - /** - * BO containing starting and ending snapshots for the - * statistics counters. - */ - struct brw_bo *bo; - } pipeline_stats; - }; -}; - /** Downcasting convenience macro. */ static inline struct brw_perf_query_object * brw_perf_query(struct gl_perf_query_object *o) @@ -302,10 +214,6 @@ brw_perf_query(struct gl_perf_query_object *o) return (struct brw_perf_query_object *) o; } -#define STATS_BO_SIZE 4096 -#define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) -#define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) - #define MI_RPC_BO_SIZE 4096 #define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2) @@ -606,36 +514,6 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, reap_old_sample_buffers(brw); } -static void -accumulate_uint32(const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - *accumulator += (uint32_t)(*report1 - *report0); -} - -static void -accumulate_uint40(int a_index, - const uint32_t *report0, - const uint32_t *report1, - uint64_t *accumulator) -{ - const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); - const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); - uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; - uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; - uint64_t value0 = report0[a_index + 4] | high0; - uint64_t value1 = report1[a_index + 4] | high1; - uint64_t delta; - - if (value0 > value1) - delta = (1ULL << 40) + value1 - value0; - else - delta = value1 - value0; - - *accumulator += delta; -} - /** * Given pointers to starting and ending OA snapshots, add the deltas for each * counter to the results. @@ -655,27 +533,29 @@ add_deltas(struct brw_context *brw, switch (query->oa_format) { case I915_OA_FORMAT_A32u40_A4u32_B8_C8: - accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ - accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ + brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator + idx++); /* timestamp */ + brw_perf_query_accumulate_uint32(start + 3, end + 3, accumulator + idx++); /* clock */ /* 32x 40bit A counters... */ for (i = 0; i < 32; i++) - accumulate_uint40(i, start, end, accumulator + idx++); + brw_perf_query_accumulate_uint40(i, start, end, accumulator + idx++); /* 4x 32bit A counters... */ for (i = 0; i < 4; i++) - accumulate_uint32(start + 36 + i, end + 36 + i, accumulator + idx++); + brw_perf_query_accumulate_uint32(start + 36 + i, end + 36 + i, + accumulator + idx++); /* 8x 32bit B counters + 8x 32bit C counters... */ for (i = 0; i < 16; i++) - accumulate_uint32(start + 48 + i, end + 48 + i, accumulator + idx++); + brw_perf_query_accumulate_uint32(start + 48 + i, end + 48 + i, + accumulator + idx++); break; case I915_OA_FORMAT_A45_B8_C8: - accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ + brw_perf_query_accumulate_uint32(start + 1, end + 1, accumulator); /* timestamp */ for (i = 0; i < 61; i++) - accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); + brw_perf_query_accumulate_uint32(start + 3 + i, end + 3 + i, accumulator + 1 + i); break; default: @@ -1643,54 +1523,11 @@ brw_delete_perf_query(struct gl_context *ctx, /******************************************************************************/ -static struct brw_perf_query_info * -append_query_info(struct brw_context *brw) -{ - brw->perfquery.queries = - reralloc(brw, brw->perfquery.queries, - struct brw_perf_query_info, ++brw->perfquery.n_queries); - - return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; -} - -static void -add_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, - uint32_t numerator, - uint32_t denominator, - const char *name, - const char *description) -{ - struct brw_perf_query_counter *counter; - - assert(query->n_counters < MAX_STAT_COUNTERS); - - counter = &query->counters[query->n_counters]; - counter->name = name; - counter->desc = description; - counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; - counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; - counter->size = sizeof(uint64_t); - counter->offset = sizeof(uint64_t) * query->n_counters; - counter->pipeline_stat.reg = reg; - counter->pipeline_stat.numerator = numerator; - counter->pipeline_stat.denominator = denominator; - - query->n_counters++; -} - -static void -add_basic_stat_reg(struct brw_perf_query_info *query, - uint32_t reg, const char *name) -{ - add_stat_reg(query, reg, 1, 1, name, name); -} - static void init_pipeline_statistic_query_registers(struct brw_context *brw) { const struct gen_device_info *devinfo = &brw->screen->devinfo; - struct brw_perf_query_info *query = append_query_info(brw); + struct brw_perf_query_info *query = brw_perf_query_append_query_info(brw); query->kind = PIPELINE_STATS; query->name = "Pipeline Statistics Registers"; @@ -1698,75 +1535,75 @@ init_pipeline_statistic_query_registers(struct brw_context *brw) query->counters = rzalloc_array(brw, struct brw_perf_query_counter, MAX_STAT_COUNTERS); - add_basic_stat_reg(query, IA_VERTICES_COUNT, - "N vertices submitted"); - add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, - "N primitives submitted"); - add_basic_stat_reg(query, VS_INVOCATION_COUNT, - "N vertex shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, IA_VERTICES_COUNT, + "N vertices submitted"); + brw_perf_query_info_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT, + "N primitives submitted"); + brw_perf_query_info_add_basic_stat_reg(query, VS_INVOCATION_COUNT, + "N vertex shader invocations"); if (devinfo->gen == 6) { - add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, - "SO_PRIM_STORAGE_NEEDED", - "N geometry shader stream-out primitives (total)"); - add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, - "SO_NUM_PRIMS_WRITTEN", - "N geometry shader stream-out primitives (written)"); + brw_perf_query_info_add_stat_reg(query, GEN6_SO_PRIM_STORAGE_NEEDED, 1, 1, + "SO_PRIM_STORAGE_NEEDED", + "N geometry shader stream-out primitives (total)"); + brw_perf_query_info_add_stat_reg(query, GEN6_SO_NUM_PRIMS_WRITTEN, 1, 1, + "SO_NUM_PRIMS_WRITTEN", + "N geometry shader stream-out primitives (written)"); } else { - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 0)", - "N stream-out (stream 0) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 1)", - "N stream-out (stream 1) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 2)", - "N stream-out (stream 2) primitives (total)"); - add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, - "SO_PRIM_STORAGE_NEEDED (Stream 3)", - "N stream-out (stream 3) primitives (total)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 0)", - "N stream-out (stream 0) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 1)", - "N stream-out (stream 1) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 2)", - "N stream-out (stream 2) primitives (written)"); - add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, - "SO_NUM_PRIMS_WRITTEN (Stream 3)", - "N stream-out (stream 3) primitives (written)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(0), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 0)", + "N stream-out (stream 0) primitives (total)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(1), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 1)", + "N stream-out (stream 1) primitives (total)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(2), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 2)", + "N stream-out (stream 2) primitives (total)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_PRIM_STORAGE_NEEDED(3), 1, 1, + "SO_PRIM_STORAGE_NEEDED (Stream 3)", + "N stream-out (stream 3) primitives (total)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(0), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 0)", + "N stream-out (stream 0) primitives (written)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(1), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 1)", + "N stream-out (stream 1) primitives (written)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(2), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 2)", + "N stream-out (stream 2) primitives (written)"); + brw_perf_query_info_add_stat_reg(query, GEN7_SO_NUM_PRIMS_WRITTEN(3), 1, 1, + "SO_NUM_PRIMS_WRITTEN (Stream 3)", + "N stream-out (stream 3) primitives (written)"); } - add_basic_stat_reg(query, HS_INVOCATION_COUNT, - "N TCS shader invocations"); - add_basic_stat_reg(query, DS_INVOCATION_COUNT, - "N TES shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, HS_INVOCATION_COUNT, + "N TCS shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, DS_INVOCATION_COUNT, + "N TES shader invocations"); - add_basic_stat_reg(query, GS_INVOCATION_COUNT, - "N geometry shader invocations"); - add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, - "N geometry shader primitives emitted"); + brw_perf_query_info_add_basic_stat_reg(query, GS_INVOCATION_COUNT, + "N geometry shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT, + "N geometry shader primitives emitted"); - add_basic_stat_reg(query, CL_INVOCATION_COUNT, - "N primitives entering clipping"); - add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, - "N primitives leaving clipping"); + brw_perf_query_info_add_basic_stat_reg(query, CL_INVOCATION_COUNT, + "N primitives entering clipping"); + brw_perf_query_info_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT, + "N primitives leaving clipping"); if (devinfo->is_haswell || devinfo->gen == 8) - add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, - "N fragment shader invocations", - "N fragment shader invocations"); + brw_perf_query_info_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4, + "N fragment shader invocations", + "N fragment shader invocations"); else - add_basic_stat_reg(query, PS_INVOCATION_COUNT, - "N fragment shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, PS_INVOCATION_COUNT, + "N fragment shader invocations"); - add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments"); + brw_perf_query_info_add_basic_stat_reg(query, PS_DEPTH_COUNT, "N z-pass fragments"); if (devinfo->gen >= 7) - add_basic_stat_reg(query, CS_INVOCATION_COUNT, - "N compute shader invocations"); + brw_perf_query_info_add_basic_stat_reg(query, CS_INVOCATION_COUNT, + "N compute shader invocations"); query->data_size = sizeof(uint64_t) * query->n_counters; } @@ -1776,7 +1613,9 @@ register_oa_config(struct brw_context *brw, const struct brw_perf_query_info *query, uint64_t config_id) { - struct brw_perf_query_info *registred_query = append_query_info(brw); + struct brw_perf_query_info *registred_query = + brw_perf_query_append_query_info(brw); + *registred_query = *query; registred_query->oa_metrics_set_id = config_id; DBG("metric set registred: id = %" PRIu64", guid = %s\n", diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index 11938b74df3..f62786f7f1c 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -27,33 +27,177 @@ #include <stdint.h> #include "brw_context.h" +#include "brw_performance_query_metrics.h" -struct brw_pipeline_stat -{ - uint32_t reg; - uint32_t numerator; - uint32_t denominator; -}; +/* + * When currently allocate only one page for pipeline statistics queries. Here + * we derived the maximum number of counters for that amount. + */ +#define STATS_BO_SIZE 4096 +#define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) +#define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) -struct brw_perf_query_counter +/* + * The largest OA formats we can use include: + * For Haswell: + * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. + * For Gen8+ + * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters + */ +#define MAX_OA_REPORT_COUNTERS 62 + +/** + * i965 representation of a performance query object. + * + * NB: We want to keep this structure relatively lean considering that + * applications may expect to allocate enough objects to be able to + * query around all draw calls in a frame. + */ +struct brw_perf_query_object { - const char *name; - const char *desc; - GLenum type; - GLenum data_type; - uint64_t raw_max; - size_t offset; - size_t size; + struct gl_perf_query_object base; + const struct brw_perf_query_info *query; + + /* See query->kind to know which state below is in use... */ union { - uint64_t (*oa_counter_read_uint64)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - float (*oa_counter_read_float)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - struct brw_pipeline_stat pipeline_stat; + struct { + + /** + * BO containing OA counter snapshots at query Begin/End time. + */ + struct brw_bo *bo; + + /** + * Address of mapped of @bo + */ + void *map; + + /** + * The MI_REPORT_PERF_COUNT command lets us specify a unique + * ID that will be reflected in the resulting OA report + * that's written by the GPU. This is the ID we're expecting + * in the begin report and the the end report should be + * @begin_report_id + 1. + */ + int begin_report_id; + + /** + * Reference the head of the brw->perfquery.sample_buffers + * list at the time that the query started (so we only need + * to look at nodes after this point when looking for samples + * related to this query) + * + * (See struct brw_oa_sample_buf description for more details) + */ + struct exec_node *samples_head; + + /** + * Storage for the final accumulated OA counters. + */ + uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; + + /** + * Hw ID used by the context on which the query was running. + */ + uint32_t hw_id; + + /** + * false while in the unaccumulated_elements list, and set to + * true when the final, end MI_RPC snapshot has been + * accumulated. + */ + bool results_accumulated; + + /** + * Number of reports accumulated to produce the results. + */ + uint32_t reports_accumulated; + } oa; + + struct { + /** + * BO containing starting and ending snapshots for the + * statistics counters. + */ + struct brw_bo *bo; + } pipeline_stats; }; }; +static inline struct brw_perf_query_info * +brw_perf_query_append_query_info(struct brw_context *brw) +{ + brw->perfquery.queries = + reralloc(brw, brw->perfquery.queries, + struct brw_perf_query_info, ++brw->perfquery.n_queries); + + return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; +} + +static inline void +brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query, + uint32_t reg, + uint32_t numerator, + uint32_t denominator, + const char *name, + const char *description) +{ + struct brw_perf_query_counter *counter; + + assert(query->n_counters < MAX_STAT_COUNTERS); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; + counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; + counter->size = sizeof(uint64_t); + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query, + uint32_t reg, const char *name) +{ + brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); +} + +/* Accumulate 32bits OA counters */ +static inline void +brw_perf_query_accumulate_uint32(const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + *accumulator += (uint32_t)(*report1 - *report0); +} + +/* Accumulate 40bits OA counters */ +static inline void +brw_perf_query_accumulate_uint40(int a_index, + const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); + const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); + uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; + uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; + uint64_t value0 = report0[a_index + 4] | high0; + uint64_t value1 = report1[a_index + 4] | high1; + uint64_t delta; + + if (value0 > value1) + delta = (1ULL << 40) + value1 - value0; + else + delta = value1 - value0; + + *accumulator += delta; +} + #endif /* BRW_PERFORMANCE_QUERY_H */ diff --git a/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h b/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h new file mode 100644 index 00000000000..80d7ddc07cf --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_performance_query_metrics.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2018 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_PERFORMANCE_QUERY_METRICS_H +#define BRW_PERFORMANCE_QUERY_METRICS_H + +#include <stdint.h> + +struct brw_pipeline_stat +{ + uint32_t reg; + uint32_t numerator; + uint32_t denominator; +}; + +struct brw_perf_query_counter +{ + const char *name; + const char *desc; + GLenum type; + GLenum data_type; + uint64_t raw_max; + size_t offset; + size_t size; + + union { + uint64_t (*oa_counter_read_uint64)(struct brw_context *brw, + const struct brw_perf_query_info *query, + uint64_t *accumulator); + float (*oa_counter_read_float)(struct brw_context *brw, + const struct brw_perf_query_info *query, + uint64_t *accumulator); + struct brw_pipeline_stat pipeline_stat; + }; +}; + +#endif /* BRW_PERFORMANCE_QUERY_METRICS_H */ diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index 4d8280df573..a73ca7807fe 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -56,6 +56,7 @@ files_i965 = files( 'brw_pipe_control.c', 'brw_performance_query.h', 'brw_performance_query.c', + 'brw_performance_query_metrics.h', 'brw_program.c', 'brw_program.h', 'brw_program_binary.c', |