summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeon
diff options
context:
space:
mode:
authorNicolai Hähnle <[email protected]>2015-11-25 15:30:03 +0100
committerNicolai Hähnle <[email protected]>2015-11-25 15:52:09 +0100
commitad22006892c5511dac7d0d680633a1b857da49fb (patch)
tree46c64acd6d362d3ca3060787f1549e850561cb5d /src/gallium/drivers/radeon
parentb9fc01aee75dcc2d56750ea430e32d74127faf69 (diff)
radeonsi: implement AMD_performance_monitor for CIK+
Expose most of the performance counter groups that are exposed by Catalyst. Ideally, the driver will work with GPUPerfStudio at some point, but we are not quite there yet. In any case, this is the reason for grouping multiple instances of hardware blocks in the way it is implemented. The counters can also be shown using the Gallium HUD. If one is interested to see how work is distributed across multiple shader engines, one can set the environment variable RADEON_PC_SEPARATE_SE=1 to obtain finer-grained performance counter groups. Part of the implementation is in radeon because an implementation for older hardware would largely follow along the same lines, but exposing a different set of blocks which are programmed slightly differently. Reviewed-by: Marek Olšák <[email protected]>
Diffstat (limited to 'src/gallium/drivers/radeon')
-rw-r--r--src/gallium/drivers/radeon/Makefile.sources1
-rw-r--r--src/gallium/drivers/radeon/r600_perfcounter.c636
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.c1
-rw-r--r--src/gallium/drivers/radeon/r600_pipe_common.h7
-rw-r--r--src/gallium/drivers/radeon/r600_query.c21
-rw-r--r--src/gallium/drivers/radeon/r600_query.h121
6 files changed, 784 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources
index 1dbad2f39e3..eb171f7da5f 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -4,6 +4,7 @@ C_SOURCES := \
r600_cs.h \
r600d_common.h \
r600_gpu_load.c \
+ r600_perfcounter.c \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
new file mode 100644
index 00000000000..a710c042b27
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Nicolai Hähnle <[email protected]>
+ *
+ */
+
+#include "util/u_memory.h"
+#include "r600_query.h"
+#include "r600_pipe_common.h"
+#include "r600d_common.h"
+
+/* Max counters per HW block */
+#define R600_QUERY_MAX_COUNTERS 16
+
+static const char * const r600_pc_shader_suffix[] = {
+ "", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
+};
+
+static struct r600_perfcounter_block *
+lookup_counter(struct r600_perfcounters *pc, unsigned index,
+ unsigned *base_gid, unsigned *sub_index)
+{
+ struct r600_perfcounter_block *block = pc->blocks;
+ unsigned bid;
+
+ *base_gid = 0;
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ unsigned total = block->num_groups * block->num_selectors;
+
+ if (index < total) {
+ *sub_index = index;
+ return block;
+ }
+
+ index -= total;
+ *base_gid += block->num_groups;
+ }
+
+ return NULL;
+}
+
+static struct r600_perfcounter_block *
+lookup_group(struct r600_perfcounters *pc, unsigned *index)
+{
+ unsigned bid;
+ struct r600_perfcounter_block *block = pc->blocks;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
+ if (*index < block->num_groups)
+ return block;
+ *index -= block->num_groups;
+ }
+
+ return NULL;
+}
+
+struct r600_pc_group {
+ struct r600_pc_group *next;
+ struct r600_perfcounter_block *block;
+ unsigned sub_gid; /* only used during init */
+ unsigned result_base; /* only used during init */
+ int se;
+ int instance;
+ unsigned num_counters;
+ unsigned selectors[R600_QUERY_MAX_COUNTERS];
+};
+
+struct r600_pc_counter {
+ unsigned base;
+ unsigned dwords;
+ unsigned stride;
+};
+
+struct r600_query_pc {
+ struct r600_query_hw b;
+
+ unsigned shaders;
+ unsigned num_counters;
+ struct r600_pc_counter *counters;
+ struct r600_pc_group *groups;
+};
+
+static void r600_pc_query_destroy(struct r600_common_context *ctx,
+ struct r600_query *rquery)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)rquery;
+
+ while (query->groups) {
+ struct r600_pc_group *group = query->groups;
+ query->groups = group->next;
+ FREE(group);
+ }
+
+ FREE(query->counters);
+
+ r600_query_hw_destroy(ctx, rquery);
+}
+
+static void r600_pc_query_emit_start(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+ int current_se = -1;
+ int current_instance = -1;
+
+ if (query->shaders)
+ pc->emit_shaders(ctx, query->shaders);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+
+ if (group->se != current_se || group->instance != current_instance) {
+ current_se = group->se;
+ current_instance = group->instance;
+ pc->emit_instance(ctx, group->se, group->instance);
+ }
+
+ pc->emit_select(ctx, block, group->num_counters, group->selectors);
+ }
+
+ if (current_se != -1 || current_instance != -1)
+ pc->emit_instance(ctx, -1, -1);
+
+ pc->emit_start(ctx, buffer, va);
+}
+
+static void r600_pc_query_emit_stop(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ struct r600_resource *buffer, uint64_t va)
+{
+ struct r600_perfcounters *pc = ctx->screen->perfcounters;
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ struct r600_pc_group *group;
+
+ pc->emit_stop(ctx, buffer, va);
+
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned se = group->se >= 0 ? group->se : 0;
+ unsigned se_end = se + 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && (group->se < 0))
+ se_end = ctx->screen->info.max_se;
+
+ do {
+ unsigned instance = group->instance >= 0 ? group->instance : 0;
+
+ do {
+ pc->emit_instance(ctx, se, instance);
+ pc->emit_read(ctx, block,
+ group->num_counters, group->selectors,
+ buffer, va);
+ va += 4 * group->num_counters;
+ } while (group->instance < 0 && ++instance < block->num_instances);
+ } while (++se < se_end);
+ }
+
+ pc->emit_instance(ctx, -1, -1);
+}
+
+static void r600_pc_query_clear_result(struct r600_query_hw *hwquery,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+
+ memset(result, 0, sizeof(result->batch[0]) * query->num_counters);
+}
+
+static void r600_pc_query_add_result(struct r600_common_context *ctx,
+ struct r600_query_hw *hwquery,
+ void *buffer,
+ union pipe_query_result *result)
+{
+ struct r600_query_pc *query = (struct r600_query_pc *)hwquery;
+ uint32_t *results = buffer;
+ unsigned i, j;
+
+ for (i = 0; i < query->num_counters; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+
+ if (counter->base == ~0)
+ continue;
+
+ for (j = 0; j < counter->dwords; ++j) {
+ uint32_t value = results[counter->base + j * counter->stride];
+ result->batch[i].u32 += value;
+ }
+ }
+}
+
+static struct r600_query_ops batch_query_ops = {
+ .destroy = r600_pc_query_destroy,
+ .begin = r600_query_hw_begin,
+ .end = r600_query_hw_end,
+ .get_result = r600_query_hw_get_result
+};
+
+static struct r600_query_hw_ops batch_query_hw_ops = {
+ .emit_start = r600_pc_query_emit_start,
+ .emit_stop = r600_pc_query_emit_stop,
+ .clear_result = r600_pc_query_clear_result,
+ .add_result = r600_pc_query_add_result,
+};
+
+static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
+ struct r600_query_pc *query,
+ struct r600_perfcounter_block *block,
+ unsigned sub_gid)
+{
+ struct r600_pc_group *group = query->groups;
+
+ while (group) {
+ if (group->block == block && group->sub_gid == sub_gid)
+ return group;
+ group = group->next;
+ }
+
+ group = CALLOC_STRUCT(r600_pc_group);
+ if (!group)
+ return NULL;
+
+ group->block = block;
+ group->sub_gid = sub_gid;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ unsigned sub_gids = block->num_instances;
+ unsigned shader_id;
+ unsigned shader_mask;
+ unsigned query_shader_mask;
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS)
+ sub_gids = sub_gids * screen->info.max_se;
+ shader_id = sub_gid / sub_gids;
+ sub_gid = sub_gid % sub_gids;
+
+ if (shader_id == 0)
+ shader_mask = R600_PC_SHADER_ALL;
+ else
+ shader_mask = 1 << (shader_id - 1);
+
+ query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
+ if (query_shader_mask && query_shader_mask != shader_mask) {
+ fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
+ FREE(group);
+ return NULL;
+ }
+ query->shaders |= shader_mask;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+ // A non-zero value in query->shaders ensures that the shader
+ // masking is reset unless the user explicitly requests one.
+ query->shaders |= R600_PC_SHADER_WINDOWING;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ group->se = sub_gid / block->num_instances;
+ sub_gid = sub_gid % block->num_instances;
+ } else {
+ group->se = -1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ group->instance = sub_gid;
+ } else {
+ group->instance = -1;
+ }
+
+ group->next = query->groups;
+ query->groups = group;
+
+ return group;
+}
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types)
+{
+ struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+ struct r600_common_screen *screen = rctx->screen;
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ struct r600_pc_group *group;
+ struct r600_query_pc *query;
+ unsigned base_gid, sub_gid, sub_index;
+ unsigned i, j;
+
+ if (!pc)
+ return NULL;
+
+ query = CALLOC_STRUCT(r600_query_pc);
+ if (!query)
+ return NULL;
+
+ query->b.b.ops = &batch_query_ops;
+ query->b.ops = &batch_query_hw_ops;
+ query->b.flags = R600_QUERY_HW_FLAG_TIMER;
+
+ query->num_counters = num_queries;
+
+ /* Collect selectors per group */
+ for (i = 0; i < num_queries; ++i) {
+ unsigned sub_gid;
+
+ if (query_types[i] < R600_QUERY_FIRST_PERFCOUNTER)
+ goto error;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+ if (!block)
+ goto error;
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ if (!group)
+ goto error;
+
+ if (group->num_counters >= block->num_counters) {
+ fprintf(stderr,
+ "perfcounter group %s: too many selected\n",
+ block->basename);
+ goto error;
+ }
+ group->selectors[group->num_counters] = sub_index;
+ ++group->num_counters;
+ }
+
+ /* Compute result bases and CS size per group */
+ query->b.num_cs_dw_begin = pc->num_start_cs_dwords;
+ query->b.num_cs_dw_end = pc->num_stop_cs_dwords;
+
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += pc->num_instance_cs_dwords;
+
+ i = 0;
+ for (group = query->groups; group; group = group->next) {
+ struct r600_perfcounter_block *block = group->block;
+ unsigned select_dw, read_dw;
+ unsigned instances = 1;
+
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ instances = rctx->screen->info.max_se;
+ if (group->instance < 0)
+ instances *= block->num_instances;
+
+ group->result_base = i;
+ query->b.result_size += 4 * instances * group->num_counters;
+ i += instances * group->num_counters;
+
+ pc->get_size(block, group->num_counters, group->selectors,
+ &select_dw, &read_dw);
+ query->b.num_cs_dw_begin += select_dw;
+ query->b.num_cs_dw_end += instances * read_dw;
+ query->b.num_cs_dw_begin += pc->num_instance_cs_dwords; /* conservative */
+ query->b.num_cs_dw_end += instances * pc->num_instance_cs_dwords;
+ }
+
+ if (query->shaders) {
+ if ((query->shaders & R600_PC_SHADER_ALL) == 0)
+ query->shaders |= R600_PC_SHADER_ALL;
+ query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
+ }
+
+ /* Map user-supplied query array to result indices */
+ query->counters = CALLOC(num_queries, sizeof(*query->counters));
+ for (i = 0; i < num_queries; ++i) {
+ struct r600_pc_counter *counter = &query->counters[i];
+ struct r600_perfcounter_block *block;
+
+ block = lookup_counter(pc, query_types[i] - R600_QUERY_FIRST_PERFCOUNTER,
+ &base_gid, &sub_index);
+
+ sub_gid = sub_index / block->num_selectors;
+ sub_index = sub_index % block->num_selectors;
+
+ group = get_group_state(screen, query, block, sub_gid);
+ assert(group != NULL);
+
+ for (j = 0; j < group->num_counters; ++j) {
+ if (group->selectors[j] == sub_index)
+ break;
+ }
+
+ counter->base = group->result_base + j;
+ counter->stride = group->num_counters;
+
+ counter->dwords = 1;
+ if ((block->flags & R600_PC_BLOCK_SE) && group->se < 0)
+ counter->dwords = screen->info.max_se;
+ if (group->instance < 0)
+ counter->dwords *= block->num_instances;
+ }
+
+ if (!r600_query_hw_init(rctx, &query->b))
+ goto error;
+
+ return (struct pipe_query *)query;
+
+error:
+ r600_pc_query_destroy(rctx, &query->b.b);
+ return NULL;
+}
+
+int r600_get_perfcounter_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+ unsigned base_gid, sub;
+
+ if (!pc)
+ return 0;
+
+ if (!info) {
+ unsigned bid, num_queries = 0;
+
+ for (bid = 0; bid < pc->num_blocks; ++bid) {
+ num_queries += pc->blocks[bid].num_selectors *
+ pc->blocks[bid].num_groups;
+ }
+
+ return num_queries;
+ }
+
+ block = lookup_counter(pc, index, &base_gid, &sub);
+ if (!block)
+ return 0;
+
+ info->name = block->selector_names + sub * block->selector_name_stride;
+ info->query_type = R600_QUERY_FIRST_PERFCOUNTER + index;
+ info->max_value.u64 = 0;
+ info->type = PIPE_DRIVER_QUERY_TYPE_UINT;
+ info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;
+ info->group_id = base_gid + sub / block->num_selectors;
+ info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;
+ return 1;
+}
+
+int r600_get_perfcounter_group_info(struct r600_common_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_perfcounters *pc = screen->perfcounters;
+ struct r600_perfcounter_block *block;
+
+ if (!pc)
+ return 0;
+
+ if (!info)
+ return pc->num_groups;
+
+ block = lookup_group(pc, &index);
+ if (!block)
+ return 0;
+ info->name = block->group_names + index * block->group_name_stride;
+ info->num_queries = block->num_selectors;
+ info->max_active_queries = block->num_counters;
+ return 1;
+}
+
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen)
+{
+ if (rscreen->perfcounters)
+ rscreen->perfcounters->cleanup(rscreen);
+}
+
+boolean r600_perfcounters_init(struct r600_perfcounters *pc,
+ unsigned num_blocks)
+{
+ pc->blocks = CALLOC(num_blocks, sizeof(struct r600_perfcounter_block));
+ if (!pc->blocks)
+ return FALSE;
+
+ pc->separate_se = debug_get_bool_option("RADEON_PC_SEPARATE_SE", FALSE);
+ pc->separate_instance = debug_get_bool_option("RADEON_PC_SEPARATE_INSTANCE", FALSE);
+
+ return TRUE;
+}
+
+boolean r600_perfcounters_add_block(struct r600_common_screen *rscreen,
+ struct r600_perfcounters *pc,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data)
+{
+ struct r600_perfcounter_block *block = &pc->blocks[pc->num_blocks];
+ unsigned i, j, k;
+ unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
+ unsigned namelen;
+ char *groupname;
+ char *p;
+
+ assert(counters <= R600_QUERY_MAX_COUNTERS);
+
+ block->basename = name;
+ block->flags = flags;
+ block->num_counters = counters;
+ block->num_selectors = selectors;
+ block->num_instances = MAX2(instances, 1);
+ block->data = data;
+
+ if (pc->separate_se && (block->flags & R600_PC_BLOCK_SE))
+ block->flags |= R600_PC_BLOCK_SE_GROUPS;
+ if (pc->separate_instance && block->num_instances > 1)
+ block->flags |= R600_PC_BLOCK_INSTANCE_GROUPS;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ groups_instance = block->num_instances;
+ block->num_groups = groups_instance;
+ } else {
+ block->num_groups = 1;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ groups_se = rscreen->info.max_se;
+ block->num_groups *= groups_se;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+ block->num_groups *= groups_shader;
+ }
+
+ namelen = strlen(name);
+ block->group_name_stride = namelen + 1;
+ if (block->flags & R600_PC_BLOCK_SHADER)
+ block->group_name_stride += 3;
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ assert(groups_se <= 10);
+ block->group_name_stride += 1;
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ block->group_name_stride += 1;
+ }
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS) {
+ assert(groups_instance <= 100);
+ block->group_name_stride += 2;
+ }
+
+ block->group_names = MALLOC(block->num_groups * block->group_name_stride);
+ if (!block->group_names)
+ goto error;
+
+ groupname = block->group_names;
+ for (i = 0; i < groups_shader; ++i) {
+ unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+ for (j = 0; j < groups_se; ++j) {
+ for (k = 0; k < groups_instance; ++k) {
+ strcpy(groupname, name);
+ p = groupname + namelen;
+
+ if (block->flags & R600_PC_BLOCK_SHADER) {
+ strcpy(p, r600_pc_shader_suffix[i]);
+ p += shaderlen;
+ }
+
+ if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
+ p += sprintf(p, "%d", j);
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ *p++ = '_';
+ }
+
+ if (block->flags & R600_PC_BLOCK_INSTANCE_GROUPS)
+ p += sprintf(p, "%d", k);
+
+ groupname += block->group_name_stride;
+ }
+ }
+ }
+
+ assert(selectors <= 1000);
+ block->selector_name_stride = block->group_name_stride + 4;
+ block->selector_names = MALLOC(block->num_groups * selectors *
+ block->selector_name_stride);
+ if (!block->selector_names)
+ goto error_groupnames;
+
+ groupname = block->group_names;
+ p = block->selector_names;
+ for (i = 0; i < block->num_groups; ++i) {
+ for (j = 0; j < selectors; ++j) {
+ sprintf(p, "%s_%03d", groupname, j);
+ p += block->selector_name_stride;
+ }
+ groupname += block->group_name_stride;
+ }
+
+ ++pc->num_blocks;
+ pc->num_groups += block->num_groups;
+
+ return TRUE;
+
+error_groupnames:
+ FREE(block->group_names);
+error:
+ return FALSE;
+}
+
+void r600_perfcounters_do_destroy(struct r600_perfcounters *pc)
+{
+ unsigned i;
+
+ for (i = 0; i < pc->num_blocks; ++i) {
+ FREE(pc->blocks[i].group_names);
+ FREE(pc->blocks[i].selector_names);
+ }
+ FREE(pc->blocks);
+ FREE(pc);
+}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 7464f677398..f03dcd96e85 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -977,6 +977,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
void r600_destroy_common_screen(struct r600_common_screen *rscreen)
{
+ r600_perfcounters_destroy(rscreen);
r600_gpu_load_kill_thread(rscreen);
pipe_mutex_destroy(rscreen->gpu_load_mutex);
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index fbdc5c410ae..253d6577680 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -90,6 +90,7 @@
#define R600_MAP_BUFFER_ALIGNMENT 64
struct r600_common_context;
+struct r600_perfcounters;
struct radeon_shader_reloc {
char *name;
@@ -300,6 +301,9 @@ struct r600_common_screen {
volatile unsigned gpu_load_stop_thread; /* bool */
char renderer_string[64];
+
+ /* Performance counters. */
+ struct r600_perfcounters *perfcounters;
};
/* This encapsulates a state or an operation which can emitted into the GPU
@@ -508,6 +512,9 @@ void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen);
uint64_t r600_gpu_load_begin(struct r600_common_screen *rscreen);
unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
+/* r600_perfcounters.c */
+void r600_perfcounters_destroy(struct r600_common_screen *rscreen);
+
/* r600_query.c */
void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
void r600_query_init(struct r600_common_context *rctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 38bbbbf8a5e..09eabab0e7d 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1141,11 +1141,15 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
unsigned num_queries = r600_get_num_queries(rscreen);
- if (!info)
- return num_queries;
+ if (!info) {
+ unsigned num_perfcounters =
+ r600_get_perfcounter_info(rscreen, 0, NULL);
+
+ return num_queries + num_perfcounters;
+ }
if (index >= num_queries)
- return 0;
+ return r600_get_perfcounter_info(rscreen, index - num_queries, info);
*info = r600_driver_query_list[index];
@@ -1166,9 +1170,19 @@ static int r600_get_driver_query_info(struct pipe_screen *screen,
return 1;
}
+static int r600_get_driver_query_group_info(struct pipe_screen *screen,
+ unsigned index,
+ struct pipe_driver_query_group_info *info)
+{
+ struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+
+ return r600_get_perfcounter_group_info(rscreen, index, info);
+}
+
void r600_query_init(struct r600_common_context *rctx)
{
rctx->b.create_query = r600_create_query;
+ rctx->b.create_batch_query = r600_create_batch_query;
rctx->b.destroy_query = r600_destroy_query;
rctx->b.begin_query = r600_begin_query;
rctx->b.end_query = r600_end_query;
@@ -1185,4 +1199,5 @@ void r600_query_init(struct r600_common_context *rctx)
void r600_init_screen_query_functions(struct r600_common_screen *rscreen)
{
rscreen->b.get_driver_query_info = r600_get_driver_query_info;
+ rscreen->b.get_driver_query_group_info = r600_get_driver_query_group_info;
}
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 0ea5707ca45..64ac916dbb6 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -31,7 +31,11 @@
#include "pipe/p_defines.h"
#include "util/list.h"
+struct pipe_context;
+struct pipe_query;
+
struct r600_common_context;
+struct r600_common_screen;
struct r600_query;
struct r600_query_hw;
struct r600_resource;
@@ -133,4 +137,121 @@ boolean r600_query_hw_get_result(struct r600_common_context *rctx,
boolean wait,
union pipe_query_result *result);
+/* Performance counters */
+enum {
+ /* This block is part of the shader engine */
+ R600_PC_BLOCK_SE = (1 << 0),
+
+ /* Expose per-instance groups instead of summing all instances (within
+ * an SE). */
+ R600_PC_BLOCK_INSTANCE_GROUPS = (1 << 1),
+
+ /* Expose per-SE groups instead of summing instances across SEs. */
+ R600_PC_BLOCK_SE_GROUPS = (1 << 2),
+
+ /* Shader block */
+ R600_PC_BLOCK_SHADER = (1 << 3),
+
+ /* Non-shader block with perfcounters windowed by shaders. */
+ R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
+};
+
+/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
+enum {
+ R600_PC_SHADER_PS = (1 << 0),
+ R600_PC_SHADER_VS = (1 << 1),
+ R600_PC_SHADER_GS = (1 << 2),
+ R600_PC_SHADER_ES = (1 << 3),
+ R600_PC_SHADER_HS = (1 << 4),
+ R600_PC_SHADER_LS = (1 << 5),
+ R600_PC_SHADER_CS = (1 << 6),
+
+ R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
+ R600_PC_SHADER_GS | R600_PC_SHADER_ES |
+ R600_PC_SHADER_HS | R600_PC_SHADER_LS |
+ R600_PC_SHADER_CS,
+
+ R600_PC_SHADER_WINDOWING = (1 << 31),
+};
+
+/* Describes a hardware block with performance counters. Multiple instances of
+ * each block, possibly per-SE, may exist on the chip. Depending on the block
+ * and on the user's configuration, we either
+ * (a) expose every instance as a performance counter group,
+ * (b) expose a single performance counter group that reports the sum over all
+ * instances, or
+ * (c) expose one performance counter group per instance, but summed over all
+ * shader engines.
+ */
+struct r600_perfcounter_block {
+ const char *basename;
+ unsigned flags;
+ unsigned num_counters;
+ unsigned num_selectors;
+ unsigned num_instances;
+
+ unsigned num_groups;
+ char *group_names;
+ unsigned group_name_stride;
+
+ char *selector_names;
+ unsigned selector_name_stride;
+
+ void *data;
+};
+
+struct r600_perfcounters {
+ unsigned num_groups;
+ unsigned num_blocks;
+ struct r600_perfcounter_block *blocks;
+
+ unsigned num_start_cs_dwords;
+ unsigned num_stop_cs_dwords;
+ unsigned num_instance_cs_dwords;
+ unsigned num_shaders_cs_dwords;
+
+ void (*get_size)(struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ unsigned *num_select_dw, unsigned *num_read_dw);
+
+ void (*emit_instance)(struct r600_common_context *,
+ int se, int instance);
+ void (*emit_shaders)(struct r600_common_context *, unsigned shaders);
+ void (*emit_select)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors);
+ void (*emit_start)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_stop)(struct r600_common_context *,
+ struct r600_resource *buffer, uint64_t va);
+ void (*emit_read)(struct r600_common_context *,
+ struct r600_perfcounter_block *,
+ unsigned count, unsigned *selectors,
+ struct r600_resource *buffer, uint64_t va);
+
+ void (*cleanup)(struct r600_common_screen *);
+
+ boolean separate_se;
+ boolean separate_instance;
+};
+
+struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
+ unsigned num_queries,
+ unsigned *query_types);
+
+int r600_get_perfcounter_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_info *info);
+int r600_get_perfcounter_group_info(struct r600_common_screen *,
+ unsigned index,
+ struct pipe_driver_query_group_info *info);
+
+boolean r600_perfcounters_init(struct r600_perfcounters *, unsigned num_blocks);
+boolean r600_perfcounters_add_block(struct r600_common_screen *,
+ struct r600_perfcounters *,
+ const char *name, unsigned flags,
+ unsigned counters, unsigned selectors,
+ unsigned instances, void *data);
+void r600_perfcounters_do_destroy(struct r600_perfcounters *);
+
#endif /* R600_QUERY_H */