From 0e7464f0a93908cc31d05fb33611f3cd73a7be65 Mon Sep 17 00:00:00 2001
From: Robert Bragg <robert@sixbynine.org>
Date: Wed, 29 Apr 2015 08:41:34 +0100
Subject: mesa: Model INTEL perf query backend after query obj BE

Instead of using the same backend interface as AMD_performance_monitor
this defines a dedicated INTEL_performance_query interface that is
modelled more on the ARB_query_buffer_object interface (considering the
similarity of the extensions) with the addition of vfuncs for
initializing and enumerating query and counter info.

Compared to the previous backend, some notable differences are:

- The backend is free to represent counters using whatever data
  structures are optimal/convenient since queries and counters are
  enumerated via an iterator api instead of declaring them using
  structures directly shared with the frontend.

  This is also done to help us support the full range of data and
  semantic types available with INTEL_performance_query which is awkward
  while using a structure shared with the AMD_performance_monitor
  backend since neither extension's types are a subset of the other.

- The backend must support waiting for a query instead of the frontend
  simply using glFinish().

- Objects go through 'Active' and 'Ready' states consistent with the
  query object backend (hopefully making them more familiar). There is
  no 'Ended' state (which used to show that a query has ended at least
  once for a given object). There is a new 'Used' state, set when a
  query is first begun which implies that we are expecting to get
  results back for the object at some point. There's no equivalent to
  the 'EverBound' state since the spec doesn't require there to be a
  limbo state between generating IDs and associating them with an object
  on query Begin.

The INTEL_performance_query and AMD_performance_monitor extensions are
now completely orthogonal within Mesa main (though a driver could
optionally choose to implement both extensions within a unified backend
if that were convenient for the sake of sharing state/code).

v2: (Samuel Pitoiset)
- init PerfQuery.NumQueries in frontend
- s/return_string/output_clipped_string/
- s/backed/backend/ typo
- remove redundant *bytesWritten = 0
v3:
- Add InitPerfQueryInfo for lazy probing of available queries
v4:
- Clean up some internal usage of GL typedefs (Ken)

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
---
 src/mesa/main/dd.h                |  41 +++
 src/mesa/main/mtypes.h            |  24 +-
 src/mesa/main/performance_query.c | 632 +++++++++++++++-----------------------
 src/mesa/main/performance_query.h |   6 +-
 4 files changed, 301 insertions(+), 402 deletions(-)

(limited to 'src/mesa/main')

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 7ebd084ca31..aba301cf22e 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -779,6 +779,47 @@ struct dd_function_table {
                                 GLint *bytesWritten);
    /*@}*/
 
+   /**
+    * \name Performance Query objects
+    */
+   /*@{*/
+   unsigned (*InitPerfQueryInfo)(struct gl_context *ctx);
+   void (*GetPerfQueryInfo)(struct gl_context *ctx,
+                            unsigned queryIndex,
+                            const char **name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive);
+   void (*GetPerfCounterInfo)(struct gl_context *ctx,
+                              unsigned queryIndex,
+                              unsigned counterIndex,
+                              const char **name,
+                              const char **desc,
+                              GLuint *offset,
+                              GLuint *data_size,
+                              GLuint *type_enum,
+                              GLuint *data_type_enum,
+                              GLuint64 *raw_max);
+   struct gl_perf_query_object * (*NewPerfQueryObject)(struct gl_context *ctx,
+                                                       unsigned queryIndex);
+   void (*DeletePerfQuery)(struct gl_context *ctx,
+                           struct gl_perf_query_object *obj);
+   GLboolean (*BeginPerfQuery)(struct gl_context *ctx,
+                               struct gl_perf_query_object *obj);
+   void (*EndPerfQuery)(struct gl_context *ctx,
+                        struct gl_perf_query_object *obj);
+   void (*WaitPerfQuery)(struct gl_context *ctx,
+                         struct gl_perf_query_object *obj);
+   GLboolean (*IsPerfQueryReady)(struct gl_context *ctx,
+                                 struct gl_perf_query_object *obj);
+   void (*GetPerfQueryData)(struct gl_context *ctx,
+                            struct gl_perf_query_object *obj,
+                            GLsizei dataSize,
+                            GLuint *data,
+                            GLuint *bytesWritten);
+   /*@}*/
+
+
    /**
     * \name GREMEDY debug/marker functions
     */
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 9f4f9613943..d07391debbb 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1859,6 +1859,23 @@ struct gl_perf_monitor_group
 };
 
 
+/**
+ * A query object instance as described in INTEL_performance_query.
+ *
+ * NB: We want to keep this and the corresponding backend structure
+ * relatively lean considering that applications may expect to
+ * allocate enough objects to be able to query around all draw calls
+ * in a frame.
+ */
+struct gl_perf_query_object
+{
+   GLuint Id;          /**< hash table ID/name */
+   unsigned Used:1;    /**< has been used for 1 or more queries */
+   unsigned Active:1;  /**< inside Begin/EndPerfQuery */
+   unsigned Ready:1;   /**< result is ready? */
+};
+
+
 /**
  * Context state for AMD_performance_monitor.
  */
@@ -1878,12 +1895,7 @@ struct gl_perf_monitor_state
  */
 struct gl_perf_query_state
 {
-   /** Array of performance monitor groups (indexed by group ID) */
-   const struct gl_perf_monitor_group *Groups;
-   GLuint NumGroups;
-
-   /** The table of all performance query objects. */
-   struct _mesa_HashTable *Objects;
+   struct _mesa_HashTable *Objects; /**< The table of all performance query objects */
 };
 
 
diff --git a/src/mesa/main/performance_query.c b/src/mesa/main/performance_query.c
index f0ecabe3b58..aa103516a58 100644
--- a/src/mesa/main/performance_query.c
+++ b/src/mesa/main/performance_query.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012-2017 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,14 +24,6 @@
 /**
  * \file performance_query.c
  * Core Mesa support for the INTEL_performance_query extension.
- *
- * In order to implement this extension, start by defining two enums:
- * one for Groups, and one for Counters.  These will be used as indexes into
- * arrays, so they should start at 0 and increment from there.
- *
- * Counter IDs need to be globally unique.  That is, you can't have counter 7
- * in group A and counter 7 in group B.  A global enum of all available
- * counters is a convenient way to guarantee this.
  */
 
 #include <stdbool.h>
@@ -42,73 +34,21 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "performance_query.h"
-#include "util/bitset.h"
 #include "util/ralloc.h"
 
 void
 _mesa_init_performance_queries(struct gl_context *ctx)
 {
    ctx->PerfQuery.Objects = _mesa_NewHashTable();
-   ctx->PerfQuery.NumGroups = 0;
-   ctx->PerfQuery.Groups = NULL;
-}
-
-static inline void
-init_groups(struct gl_context *ctx)
-{
-   if (unlikely(!ctx->PerfMonitor.Groups))
-      ctx->Driver.InitPerfMonitorGroups(ctx);
-}
-
-static struct gl_perf_monitor_object *
-new_performance_query(struct gl_context *ctx, GLuint index)
-{
-   unsigned i;
-   struct gl_perf_monitor_object *m = ctx->Driver.NewPerfMonitor(ctx);
-
-   if (m == NULL)
-      return NULL;
-
-   m->Name = index;
-
-   m->Active = false;
-
-   m->ActiveGroups =
-      rzalloc_array(NULL, unsigned, ctx->PerfQuery.NumGroups);
-
-   m->ActiveCounters =
-      ralloc_array(NULL, BITSET_WORD *, ctx->PerfQuery.NumGroups);
-
-   if (m->ActiveGroups == NULL || m->ActiveCounters == NULL)
-      goto fail;
-
-   for (i = 0; i < ctx->PerfQuery.NumGroups; i++) {
-      const struct gl_perf_monitor_group *g = &ctx->PerfQuery.Groups[i];
-
-      m->ActiveCounters[i] = rzalloc_array(m->ActiveCounters, BITSET_WORD,
-                                           BITSET_WORDS(g->NumCounters));
-      if (m->ActiveCounters[i] == NULL)
-         goto fail;
-   }
-
-   return m;
-
-fail:
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
-   return NULL;
 }
 
 static void
 free_performance_query(GLuint key, void *data, void *user)
 {
-   struct gl_perf_monitor_object *m = data;
+   struct gl_perf_query_object *m = data;
    struct gl_context *ctx = user;
 
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, m);
 }
 
 void
@@ -119,50 +59,39 @@ _mesa_free_performance_queries(struct gl_context *ctx)
    _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
 }
 
-static inline struct gl_perf_monitor_object *
-lookup_query(struct gl_context *ctx, GLuint id)
+static inline struct gl_perf_query_object *
+lookup_object(struct gl_context *ctx, GLuint id)
 {
-   return (struct gl_perf_monitor_object *)
-      _mesa_HashLookup(ctx->PerfQuery.Objects, id);
+   return _mesa_HashLookup(ctx->PerfQuery.Objects, id);
 }
 
-static inline const struct gl_perf_monitor_group *
-get_group(const struct gl_context *ctx, GLuint id)
+static GLuint
+init_performance_query_info(struct gl_context *ctx)
 {
-   if (id >= ctx->PerfQuery.NumGroups)
-      return NULL;
-
-   return &ctx->PerfQuery.Groups[id];
-}
-
-static inline const struct gl_perf_monitor_counter *
-get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
-{
-   if (id >= group_obj->NumCounters)
-      return NULL;
-
-   return &group_obj->Counters[id];
+   if (ctx->Driver.InitPerfQueryInfo)
+      return ctx->Driver.InitPerfQueryInfo(ctx);
+   else
+      return 0;
 }
 
-/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
- * index to Groups array + 1 as the query id. Same applies to counter id.
- */
-static inline GLuint
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. */
+static inline unsigned
 queryid_to_index(GLuint queryid)
 {
    return queryid - 1;
 }
 
 static inline GLuint
-index_to_queryid(GLuint index)
+index_to_queryid(unsigned index)
 {
    return index + 1;
 }
 
 static inline bool
-queryid_valid(const struct gl_context *ctx, GLuint queryid)
+queryid_valid(const struct gl_context *ctx, unsigned numQueries, GLuint queryid)
 {
-   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+   GLuint index = queryid_to_index(queryid);
+   return index >= 0 && index < numQueries;
 }
 
 static inline GLuint
@@ -171,35 +100,33 @@ counterid_to_index(GLuint counterid)
    return counterid - 1;
 }
 
-/*****************************************************************************/
-
-/**
- * Returns how many bytes a counter's value takes up.
- */
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *c)
+static void
+output_clipped_string(GLchar *stringRet,
+                      GLuint stringMaxLen,
+                      const char *string)
 {
-   switch (c->Type) {
-   case GL_FLOAT:
-   case GL_PERCENTAGE_AMD:
-      return sizeof(GLfloat);
-   case GL_UNSIGNED_INT:
-      return sizeof(GLuint);
-   case GL_UNSIGNED_INT64_AMD:
-      return sizeof(uint64_t);
-   default:
-      assert(!"Should not get here: invalid counter type");
-      return 0;
-   }
+   if (!stringRet)
+      return;
+
+   strncpy(stringRet, string ? string : "", stringMaxLen);
+
+   /* No specification given about whether returned strings needs
+    * to be zero-terminated. Zero-terminate the string always as we
+    * don't otherwise communicate the length of the returned
+    * string.
+    */
+   if (stringMaxLen > 0)
+      stringRet[stringMaxLen - 1] = '\0';
 }
 
+/*****************************************************************************/
+
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned numGroups;
 
-   init_groups(ctx);
+   unsigned numQueries;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -211,7 +138,7 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
       return;
    }
 
-   numGroups = ctx->PerfQuery.NumGroups;
+   numQueries = init_performance_query_info(ctx);
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -219,7 +146,7 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
     *    queries, then the value of 0 is returned and INVALID_OPERATION error
     *    is raised."
     */
-   if (numGroups == 0) {
+   if (numQueries == 0) {
       *queryId = 0;
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glGetFirstPerfQueryIdINTEL(no queries supported)");
@@ -233,7 +160,8 @@ extern void GLAPIENTRY
 _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   init_groups(ctx);
+
+   unsigned numQueries;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -251,29 +179,27 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
       return;
    }
 
-   if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
+   numQueries = init_performance_query_info(ctx);
+
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetNextPerfQueryIdINTEL(invalid query)");
       return;
    }
 
-   ++queryId;
-
-   if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
-   } else {
+   if (queryid_valid(ctx, numQueries, ++queryId))
       *nextQueryId = queryId;
-   }
+   else
+      *nextQueryId = 0;
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
 
-   init_groups(ctx);
+   unsigned numQueries;
+   unsigned i;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -286,15 +212,25 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
       return;
    }
 
-   /* The specification does not state that this produces an error. */
+   /* The specification does not state that this produces an error but
+    * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
+    * INVALID_VALUE error
+    */
    if (!queryId) {
-      _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
       return;
    }
 
-   for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
-      const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
-      if (strcmp(group_obj->Name, queryName) == 0) {
+   numQueries = init_performance_query_info(ctx);
+
+   for (i = 0; i < numQueries; ++i) {
+      const GLchar *name;
+      GLuint ignore;
+
+      ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore, &ignore);
+
+      if (strcmp(name, queryName) == 0) {
          *queryId = index_to_queryid(i);
          return;
       }
@@ -306,20 +242,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
-                            GLuint queryNameLength, char *queryName,
-                            GLuint *dataSize, GLuint *noCounters,
-                            GLuint *noActiveInstances,
+                            GLuint nameLength, GLchar *name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive,
                             GLuint *capsMask)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
-
-   const struct gl_perf_monitor_group *group_obj;
 
-   init_groups(ctx);
+   unsigned numQueries = init_performance_query_info(ctx);
+   unsigned queryIndex = queryid_to_index(queryId);
+   const char *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
 
-   group_obj = get_group(ctx, queryid_to_index(queryId));
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If queryId does not reference a valid query type, an
@@ -330,32 +268,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
       return;
    }
 
-   if (queryName) {
-      strncpy(queryName, group_obj->Name, queryNameLength);
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (queryNameLength > 0) {
-         queryName[queryNameLength - 1] = '\0';
-      }
-   }
+   output_clipped_string(name, nameLength, queryName);
 
-   if (dataSize) {
-      unsigned size = 0;
-      for (i = 0; i < group_obj->NumCounters; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         size += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *dataSize = size;
-   }
+   if (dataSize)
+      *dataSize = queryDataSize;
 
-   if (noCounters) {
-      *noCounters = group_obj->NumCounters;
-   }
+   if (numCounters)
+      *numCounters = queryNumCounters;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -366,130 +291,94 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
     * 2) Another typo in the specification, maxInstances parameter is not listed
     *    in the declaration of this function in the list of new functions.
     */
-   if (noActiveInstances) {
-      *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
-   }
+   if (numActive)
+      *numActive = queryNumActive;
 
-   if (capsMask) {
-      /* TODO: This information not yet available in the monitor structs. For
-       * now, we hardcode SINGLE_CONTEXT, since that's what the implementation
-       * currently tries very hard to do.
-       */
+   /* Assume for now that all queries are per-context */
+   if (capsMask)
       *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
-   }
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
-                              GLuint counterNameLength, char *counterName,
-                              GLuint counterDescLength, char *counterDesc,
-                              GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum,
-                              GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue)
+                              GLuint nameLength, GLchar *name,
+                              GLuint descLength, GLchar *desc,
+                              GLuint *offset,
+                              GLuint *dataSize,
+                              GLuint *typeEnum,
+                              GLuint *dataTypeEnum,
+                              GLuint64 *rawCounterMaxValue)
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   const struct gl_perf_monitor_group *group_obj;
-   const struct gl_perf_monitor_counter *counter_obj;
+   unsigned numQueries = init_performance_query_info(ctx);
+   unsigned queryIndex = queryid_to_index(queryId);
+   const char *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
    unsigned counterIndex;
-   unsigned i;
-
-   init_groups(ctx);
-
-   group_obj = get_group(ctx, queryid_to_index(queryId));
-
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "If the pair of queryId and counterId does not reference a valid
-    *    counter, an INVALID_VALUE error is generated."
-    */
-   if (group_obj == NULL) {
+   const char *counterName;
+   const char *counterDesc;
+   GLuint counterOffset;
+   GLuint counterDataSize;
+   GLuint counterTypeEnum;
+   GLuint counterDataTypeEnum;
+   GLuint64 counterRawMax;
+
+   if (!queryid_valid(ctx, numQueries, queryId)) {
+      /* The GL_INTEL_performance_query spec says:
+       *
+       *    "If the pair of queryId and counterId does not reference a valid
+       *    counter, an INVALID_VALUE error is generated."
+       */
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid queryId)");
       return;
    }
 
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
+
    counterIndex = counterid_to_index(counterId);
-   counter_obj = get_counter(group_obj, counterIndex);
 
-   if (counter_obj == NULL) {
+   if (counterIndex < 0 || counterIndex >= queryNumCounters) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid counterId)");
       return;
    }
 
-   if (counterName) {
-      strncpy(counterName, counter_obj->Name, counterNameLength);
-
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterNameLength > 0) {
-         counterName[counterNameLength - 1] = '\0';
-      }
-   }
+   ctx->Driver.GetPerfCounterInfo(ctx, queryIndex, counterIndex,
+                                  &counterName,
+                                  &counterDesc,
+                                  &counterOffset,
+                                  &counterDataSize,
+                                  &counterTypeEnum,
+                                  &counterDataTypeEnum,
+                                  &counterRawMax);
 
-   if (counterDesc) {
-      /* TODO: No separate description text at the moment. We pass the name
-       * again for the moment.
-       */
-      strncpy(counterDesc, counter_obj->Name, counterDescLength);
+   output_clipped_string(name, nameLength, counterName);
+   output_clipped_string(desc, descLength, counterDesc);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterDescLength > 0) {
-         counterDesc[counterDescLength - 1] = '\0';
-      }
-   }
+   if (offset)
+      *offset = counterOffset;
 
-   if (counterOffset) {
-      unsigned offset = 0;
-      for (i = 0; i < counterIndex; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         offset += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *counterOffset = 2 * sizeof(uint32_t) + offset;
-   }
+   if (dataSize)
+      *dataSize = counterDataSize;
 
-   if (counterDataSize) {
-      *counterDataSize = _mesa_perf_query_counter_size(counter_obj);
-   }
+   if (typeEnum)
+      *typeEnum = counterTypeEnum;
 
-   if (counterTypeEnum) {
-      /* TODO: Different counter types (semantic type, not data type) not
-       * supported as of yet.
-       */
-      *counterTypeEnum = GL_PERFQUERY_COUNTER_RAW_INTEL;
-   }
+   if (dataTypeEnum)
+      *dataTypeEnum = counterDataTypeEnum;
 
-   if (counterDataTypeEnum) {
-      switch (counter_obj->Type) {
-      case GL_FLOAT:
-      case GL_PERCENTAGE_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
-         break;
-      case GL_UNSIGNED_INT:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
-         break;
-      case GL_UNSIGNED_INT64_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
-         break;
-      default:
-         assert(!"Should not get here: invalid counter type");
-         return;
-      }
-   }
+   if (rawCounterMaxValue)
+      *rawCounterMaxValue = counterRawMax;
 
    if (rawCounterMaxValue) {
-      /* This value is (implicitly) specified to be used only with
-       * GL_PERFQUERY_COUNTER_RAW_INTEL counters. When semantic types for
-       * counters are added, that needs to be checked.
-       */
-
       /* The GL_INTEL_performance_query spec says:
        *
        *    "for some raw counters for which the maximal value is
@@ -497,10 +386,17 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
        *    returned in the location pointed by rawCounterMaxValue, otherwise,
        *    the location is written with the value of 0."
        *
-       * The maximum value reported by the driver at the moment is not with
-       * these semantics, so write 0 always to be safe.
+       *    Since it's very useful to be able to report a maximum value for
+       *    more that just counters using the _COUNTER_RAW_INTEL or
+       *    _COUNTER_DURATION_RAW_INTEL enums (e.g. for a _THROUGHPUT tools
+       *    want to be able to visualize the absolute throughput with respect
+       *    to the theoretical maximum that's possible) and there doesn't seem
+       *    to be any reason not to allow _THROUGHPUT counters to also be
+       *    considerer "raw" here, we always leave it up to the backend to
+       *    decide when it's appropriate to report a maximum counter value or 0
+       *    if not.
        */
-      *rawCounterMaxValue = 0;
+      *rawCounterMaxValue = counterRawMax;
    }
 }
 
@@ -508,129 +404,106 @@ extern void GLAPIENTRY
 _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   GLuint first;
-   GLuint group;
-   const struct gl_perf_monitor_group *group_obj;
-   struct gl_perf_monitor_object *m;
-   unsigned i;
-
-   init_groups(ctx);
-
-   /* This is not specified in the extension, but is the only sane thing to
-    * do.
-    */
-   if (queryHandle == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
-      return;
-   }
 
-   group = queryid_to_index(queryId);
-   group_obj = get_group(ctx, group);
+   unsigned numQueries = init_performance_query_info(ctx);
+   GLuint id;
+   struct gl_perf_query_object *obj;
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If queryId does not reference a valid query type, an INVALID_VALUE
     *    error is generated."
     */
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCreatePerfQueryINTEL(invalid queryId)");
       return;
    }
 
-   /* The query object created here is the counterpart of a `monitor' in
-    * AMD_performance_monitor. This call is equivalent to calling
-    * GenPerfMonitorsAMD and SelectPerfMonitorCountersAMD with a list of all
-    * counters in a group.
+   /* This is not specified in the extension, but is the only sane thing to
+    * do.
     */
+   if (queryHandle == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
+      return;
+   }
 
-   /* We keep the query ids contiguous */
-   first = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
-   if (!first) {
+   id = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
+   if (!id) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If the query instance cannot be created due to exceeding the
        *    number of allowed instances or driver fails query creation due to
        *    an insufficient memory reason, an OUT_OF_MEMORY error is
        *    generated, and the location pointed by queryHandle returns NULL."
-      */
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCreatePerfQueryINTEL");
+       */
+      _mesa_error_no_memory(__func__);
       return;
    }
 
-   m = new_performance_query(ctx, first);
-   if (m == NULL) {
+   obj = ctx->Driver.NewPerfQueryObject(ctx, queryid_to_index(queryId));
+   if (obj == NULL) {
       _mesa_error_no_memory(__func__);
       return;
    }
 
-   _mesa_HashInsert(ctx->PerfQuery.Objects, first, m);
-   *queryHandle = first;
-
-   ctx->Driver.ResetPerfMonitor(ctx, m);
+   obj->Id = id;
+   obj->Active = false;
+   obj->Ready = false;
 
-   for (i = 0; i < group_obj->NumCounters; ++i) {
-      ++m->ActiveGroups[group];
-      /* Counters are a continuous range of integers, 0 to NumCounters (excl),
-       * so i is the counter value to use here.
-       */
-      BITSET_SET(m->ActiveCounters[group], i);
-   }
+   _mesa_HashInsert(ctx->PerfQuery.Objects, id, obj);
+   *queryHandle = id;
 }
 
 extern void GLAPIENTRY
 _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glDeletePerfQueryINTEL(invalid queryHandle)");
       return;
    }
 
-   /* Let the driver stop the query if it's active. */
-   if (m->Active) {
-      ctx->Driver.ResetPerfMonitor(ctx, m);
-      m->Ended = false;
+   /* To avoid complications in the backend we never ask the backend to
+    * delete an active query or a query object while we are still
+    * waiting for data.
+    */
+
+   if (obj->Active)
+      _mesa_EndPerfQueryINTEL(queryHandle);
+
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
    }
 
    _mesa_HashRemove(ctx->PerfQuery.Objects, queryHandle);
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, obj);
 }
 
 extern void GLAPIENTRY
 _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
-
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
 
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glBeginPerfQueryINTEL(invalid queryHandle)");
       return;
@@ -646,15 +519,25 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
     * We also generate an INVALID_OPERATION error if the driver can't begin
     * a query for its own reasons, and for nesting the same query.
     */
-   if (m->Active) {
+   if (obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(already active)");
       return;
    }
 
-   if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
-      m->Active = true;
-      m->Ended = false;
+   /* To avoid complications in the backend we never ask the backend to
+    * reuse a query object and begin a new query while we are still
+    * waiting for data on that object.
+    */
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
+   }
+
+   if (ctx->Driver.BeginPerfQuery(ctx, obj)) {
+      obj->Used = true;
+      obj->Active = true;
+      obj->Ready = false;
    } else {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(driver unable to begin query)");
@@ -665,39 +548,32 @@ extern void GLAPIENTRY
 _mesa_EndPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
-   m = lookup_query(ctx, queryHandle);
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a performance query is not currently started, an
     *    INVALID_OPERATION error will be generated."
-    *
-    * The specification doesn't state that an invalid handle would be an
-    * INVALID_VALUE error. Regardless, query for such a handle will not be
-    * started, so we generate an INVALID_OPERATION in that case too.
     */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glEndPerfQueryINTEL(invalid queryHandle)");
-      return;
-   }
 
-   if (!m->Active) {
+   if (!obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glEndPerfQueryINTEL(not active)");
       return;
    }
 
-   ctx->Driver.EndPerfMonitor(ctx, m);
+   ctx->Driver.EndPerfQuery(ctx, obj);
 
-   m->Active = false;
-   m->Ended = true;
+   obj->Active = false;
+   obj->Ready = false;
 }
 
 extern void GLAPIENTRY
@@ -705,8 +581,15 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
                             GLsizei dataSize, void *data, GLuint *bytesWritten)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
-   bool result_available;
+
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
+
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -719,66 +602,33 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
       return;
    }
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-
-   m = lookup_query(ctx, queryHandle);
-
-   /* The specification doesn't state that an invalid handle generates an
-    * error. We could interpret that to mean the case should be handled as
-    * "measurement not ready for this query", but what should be done if
-    * `flags' equals PERFQUERY_WAIT_INTEL?
-    *
-    * To resolve this, we just generate an INVALID_VALUE from an invalid query
-    * handle.
+   /* Just for good measure in case a lazy application is only
+    * checking this and not checking for errors...
     */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetPerfQueryDataINTEL(invalid queryHandle)");
-      return;
-   }
+   *bytesWritten = 0;
 
-   /* We need at least enough room for a single value. */
-   if (dataSize < sizeof(GLuint)) {
-      *bytesWritten = 0;
-      return;
-   }
-
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "The call may end without returning any data if they are not ready
-    *    for reading as the measurement session is still pending (the
-    *    EndPerfQueryINTEL() command processing is not finished by
-    *    hardware). In this case location pointed by the bytesWritten
-    *    parameter will be set to 0."
-    *
-    * If EndPerfQueryINTEL() is not called at all, we follow this.
+   /* Not explicitly covered in the spec but to be consistent with
+    * EndPerfQuery which validates that an application only ends an
+    * active query we also validate that an application doesn't try
+    * and get the data for a still active query...
     */
-   if (!m->Ended) {
-      *bytesWritten = 0;
+   if (obj->Active) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetPerfQueryDataINTEL(query still active)");
       return;
    }
 
-   result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+   obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
 
-   if (!result_available) {
+   if (!obj->Ready) {
       if (flags == GL_PERFQUERY_FLUSH_INTEL) {
          ctx->Driver.Flush(ctx);
       } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
-         /* Assume Finish() is both enough and not too much to wait for
-          * results. If results are still not available after Finish(), the
-          * later code automatically bails out with 0 for bytesWritten.
-          */
-         ctx->Driver.Finish(ctx);
-         result_available =
-            ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+         ctx->Driver.WaitPerfQuery(ctx, obj);
+         obj->Ready = true;
       }
    }
 
-   if (result_available) {
-      ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data, (GLint*)bytesWritten);
-   } else {
-      *bytesWritten = 0;
-   }
+   if (obj->Ready)
+      ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data, bytesWritten);
 }
diff --git a/src/mesa/main/performance_query.h b/src/mesa/main/performance_query.h
index 3fed5eae1b8..8268f0ef194 100644
--- a/src/mesa/main/performance_query.h
+++ b/src/mesa/main/performance_query.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012,2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
 extern void
 _mesa_free_performance_queries(struct gl_context *ctx);
 
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
-
-
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
 
-- 
cgit v1.2.3