/**********************************************************
 * Copyright 2008-2015 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy,
 * modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 **********************************************************/

#include "pipe/p_state.h"
#include "pipe/p_context.h"

#include "util/u_bitmask.h"
#include "util/u_memory.h"

#include "svga_cmd.h"
#include "svga_context.h"
#include "svga_screen.h"
#include "svga_resource_buffer.h"
#include "svga_winsys.h"
#include "svga_debug.h"


/* Fixme: want a public base class for all pipe structs, even if there
 * isn't much in them.
 */
struct pipe_query {
   int dummy;
};

struct svga_query {
   struct pipe_query base;
   unsigned type;                  /**< PIPE_QUERY_x or SVGA_QUERY_x */
   SVGA3dQueryType svga_type;      /**< SVGA3D_QUERYTYPE_x or unused */

   unsigned id;                    /** Per-context query identifier */

   struct pipe_fence_handle *fence;

   /** For PIPE_QUERY_OCCLUSION_COUNTER / SVGA3D_QUERYTYPE_OCCLUSION */

   /* For VGPU9 */
   struct svga_winsys_buffer *hwbuf;
   volatile SVGA3dQueryResult *queryResult;

   /** For VGPU10 */
   struct svga_winsys_gb_query *gb_query;
   SVGA3dDXQueryFlags flags;
   unsigned offset;                /**< offset to the gb_query memory */
   struct pipe_query *predicate;   /** The associated query that can be used for predicate */

   /** For non-GPU SVGA_QUERY_x queries */
   uint64_t begin_count, end_count;
};


/** cast wrapper */
static inline struct svga_query *
svga_query( struct pipe_query *q )
{
   return (struct svga_query *)q;
}


static boolean
svga_get_query_result(struct pipe_context *pipe,
                      struct pipe_query *q,
                      boolean wait,
                      union pipe_query_result *result);

static enum pipe_error
define_query_vgpu9(struct svga_context *svga,
                   struct svga_query *sq)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;

   sq->hwbuf = svga_winsys_buffer_create(svga, 1,
                                         SVGA_BUFFER_USAGE_PINNED,
                                         sizeof *sq->queryResult);
   if (!sq->hwbuf)
      return PIPE_ERROR_OUT_OF_MEMORY;

   sq->queryResult = (SVGA3dQueryResult *)
                     sws->buffer_map(sws, sq->hwbuf, PIPE_TRANSFER_WRITE);
   if (!sq->queryResult) {
      sws->buffer_destroy(sws, sq->hwbuf);
      return PIPE_ERROR_OUT_OF_MEMORY;
   }

   sq->queryResult->totalSize = sizeof *sq->queryResult;
   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;

   /* We request the buffer to be pinned and assume it is always mapped.
    * The reason is that we don't want to wait for fences when checking the
    * query status.
    */
   sws->buffer_unmap(sws, sq->hwbuf);

   return PIPE_OK;
}

static enum pipe_error
begin_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   enum pipe_error ret = PIPE_OK;

   if (sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
      /* The application doesn't care for the pending query result.
       * We cannot let go of the existing buffer and just get a new one
       * because its storage may be reused for other purposes and clobbered
       * by the host when it determines the query result.  So the only
       * option here is to wait for the existing query's result -- not a
       * big deal, given that no sane application would do this.
       */
       uint64_t result;
       svga_get_query_result(&svga->pipe, &sq->base, TRUE, (void*)&result);
       assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
   }

   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
   sws->fence_reference(sws, &sq->fence, NULL);

   ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_BeginQuery(svga->swc, sq->svga_type);
   }
   return ret;
}

static enum pipe_error
end_query_vgpu9(struct svga_context *svga, struct svga_query *sq)
{
   enum pipe_error ret = PIPE_OK;

   /* Set to PENDING before sending EndQuery. */
   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;

   ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_EndQuery(svga->swc, sq->svga_type, sq->hwbuf);
   }
   return ret;
}

static boolean
get_query_result_vgpu9(struct svga_context *svga, struct svga_query *sq,
                       boolean wait, uint64_t *result)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   enum pipe_error ret;
   SVGA3dQueryState state;

   if (!sq->fence) {
      /* The query status won't be updated by the host unless
       * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause
       * a synchronous wait on the host.
       */
      ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
      if (ret != PIPE_OK) {
         svga_context_flush(svga, NULL);
         ret = SVGA3D_WaitForQuery(svga->swc, sq->svga_type, sq->hwbuf);
      }
      assert (ret == PIPE_OK);
      svga_context_flush(svga, &sq->fence);
      assert(sq->fence);
   }

   state = sq->queryResult->state;
   if (state == SVGA3D_QUERYSTATE_PENDING) {
      if (!wait)
         return FALSE;
      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
      state = sq->queryResult->state;
   }

   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED ||
          state == SVGA3D_QUERYSTATE_FAILED);

   *result = (uint64_t)sq->queryResult->result32;
   return TRUE;
}


/**
 * VGPU10
 *
 * There is one query mob allocated for each context to be shared by all
 * query types. The mob is used to hold queries's state and result. Since
 * each query result type is of different length, to ease the query allocation
 * management, the mob is divided into memory blocks. Each memory block
 * will hold queries of the same type. Multiple memory blocks can be allocated
 * for a particular query type.
 *
 * Currently each memory block is of 184 bytes. We support up to 128
 * memory blocks. The query memory size is arbitrary right now.
 * Each occlusion query takes about 8 bytes. One memory block can accomodate
 * 23 occlusion queries. 128 of those blocks can support up to 2944 occlusion
 * queries. That seems reasonable for now. If we think this limit is
 * not enough, we can increase the limit or try to grow the mob in runtime.
 * Note, SVGA device does not impose one mob per context for queries,
 * we could allocate multiple mobs for queries; however, wddm KMD does not
 * currently support that.
 *
 * Also note that the GL guest driver does not issue any of the
 * following commands: DXMoveQuery, DXBindAllQuery & DXReadbackAllQuery.
 */
#define SVGA_QUERY_MEM_BLOCK_SIZE    (sizeof(SVGADXQueryResultUnion) * 2)
#define SVGA_QUERY_MEM_SIZE          (128 * SVGA_QUERY_MEM_BLOCK_SIZE)

struct svga_qmem_alloc_entry
{
   unsigned start_offset;               /* start offset of the memory block */
   unsigned block_index;                /* block index of the memory block */
   unsigned query_size;                 /* query size in this memory block */
   unsigned nquery;                     /* number of queries allocated */
   struct util_bitmask *alloc_mask;     /* allocation mask */
   struct svga_qmem_alloc_entry *next;  /* next memory block */
};


/**
 * Allocate a memory block from the query object memory
 * \return -1 if out of memory, else index of the query memory block
 */
static int
allocate_query_block(struct svga_context *svga)
{
   int index;
   unsigned offset;

   /* Find the next available query block */
   index = util_bitmask_add(svga->gb_query_alloc_mask);

   if (index == UTIL_BITMASK_INVALID_INDEX)
      return -1;

   offset = index * SVGA_QUERY_MEM_BLOCK_SIZE;
   if (offset >= svga->gb_query_len) {
      unsigned i;

      /**
       * All the memory blocks are allocated, lets see if there is
       * any empty memory block around that can be freed up.
       */
      index = -1;
      for (i = 0; i < SVGA_QUERY_MAX && index == -1; i++) {
         struct svga_qmem_alloc_entry *alloc_entry;
         struct svga_qmem_alloc_entry *prev_alloc_entry = NULL;

         alloc_entry = svga->gb_query_map[i];
         while (alloc_entry && index == -1) {
            if (alloc_entry->nquery == 0) {
               /* This memory block is empty, it can be recycled. */
               if (prev_alloc_entry) {
                  prev_alloc_entry->next = alloc_entry->next;
               } else {
                  svga->gb_query_map[i] = alloc_entry->next;
               }
               index = alloc_entry->block_index;
            } else {
               prev_alloc_entry = alloc_entry;
               alloc_entry = alloc_entry->next;
            }
         }
      }
   }

   return index;
}

/**
 * Allocate a slot in the specified memory block.
 * All slots in this memory block are of the same size.
 *
 * \return -1 if out of memory, else index of the query slot
 */
static int
allocate_query_slot(struct svga_context *svga,
                    struct svga_qmem_alloc_entry *alloc)
{
   int index;
   unsigned offset;

   /* Find the next available slot */
   index = util_bitmask_add(alloc->alloc_mask);

   if (index == UTIL_BITMASK_INVALID_INDEX)
      return -1;

   offset = index * alloc->query_size;
   if (offset >= SVGA_QUERY_MEM_BLOCK_SIZE)
      return -1;

   alloc->nquery++;

   return index;
}

/**
 * Deallocate the specified slot in the memory block.
 * If all slots are freed up, then deallocate the memory block
 * as well, so it can be allocated for other query type
 */
static void
deallocate_query_slot(struct svga_context *svga,
                      struct svga_qmem_alloc_entry *alloc,
                      unsigned index)
{
   assert(index != UTIL_BITMASK_INVALID_INDEX);

   util_bitmask_clear(alloc->alloc_mask, index);
   alloc->nquery--;

   /**
    * Don't worry about deallocating the empty memory block here.
    * The empty memory block will be recycled when no more memory block
    * can be allocated.
    */
}

static struct svga_qmem_alloc_entry *
allocate_query_block_entry(struct svga_context *svga,
                           unsigned len)
{
   struct svga_qmem_alloc_entry *alloc_entry;
   int block_index = -1;

   block_index = allocate_query_block(svga);
   if (block_index == -1)
      return NULL;
   alloc_entry = CALLOC_STRUCT(svga_qmem_alloc_entry);
   if (!alloc_entry)
      return NULL;

   alloc_entry->block_index = block_index;
   alloc_entry->start_offset = block_index * SVGA_QUERY_MEM_BLOCK_SIZE;
   alloc_entry->nquery = 0;
   alloc_entry->alloc_mask = util_bitmask_create();
   alloc_entry->next = NULL;
   alloc_entry->query_size = len;

   return alloc_entry;
}

/**
 * Allocate a memory slot for a query of the specified type.
 * It will first search through the memory blocks that are allocated
 * for the query type. If no memory slot is available, it will try
 * to allocate another memory block within the query object memory for
 * this query type.
 */
static int
allocate_query(struct svga_context *svga,
               SVGA3dQueryType type,
               unsigned len)
{
   struct svga_qmem_alloc_entry *alloc_entry;
   int slot_index = -1;
   unsigned offset;

   assert(type < SVGA_QUERY_MAX);

   alloc_entry = svga->gb_query_map[type];

   if (!alloc_entry) {
      /**
       * No query memory block has been allocated for this query type,
       * allocate one now
       */
      alloc_entry = allocate_query_block_entry(svga, len);
      if (!alloc_entry)
         return -1;
      svga->gb_query_map[type] = alloc_entry;
   }

   /* Allocate a slot within the memory block allocated for this query type */
   slot_index = allocate_query_slot(svga, alloc_entry);

   if (slot_index == -1) {
      /* This query memory block is full, allocate another one */
      alloc_entry = allocate_query_block_entry(svga, len);
      if (!alloc_entry)
         return -1;
      alloc_entry->next = svga->gb_query_map[type];
      svga->gb_query_map[type] = alloc_entry;
      slot_index = allocate_query_slot(svga, alloc_entry);
   }

   assert(slot_index != -1);
   offset = slot_index * len + alloc_entry->start_offset;

   return offset;
}


/**
 * Deallocate memory slot allocated for the specified query
 */
static void
deallocate_query(struct svga_context *svga,
                 struct svga_query *sq)
{
   struct svga_qmem_alloc_entry *alloc_entry;
   unsigned slot_index;
   unsigned offset = sq->offset;

   alloc_entry = svga->gb_query_map[sq->svga_type];

   while (alloc_entry) {
      if (offset >= alloc_entry->start_offset &&
          offset < alloc_entry->start_offset + SVGA_QUERY_MEM_BLOCK_SIZE) {

         /* The slot belongs to this memory block, deallocate it */
         slot_index = (offset - alloc_entry->start_offset) /
                      alloc_entry->query_size;
         deallocate_query_slot(svga, alloc_entry, slot_index);
         alloc_entry = NULL;
      } else {
         alloc_entry = alloc_entry->next;
      }
   }
}


/**
 * Destroy the gb query object and all the related query structures
 */
static void
destroy_gb_query_obj(struct svga_context *svga)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   unsigned i;

   for (i = 0; i < SVGA_QUERY_MAX; i++) {
      struct svga_qmem_alloc_entry *alloc_entry, *next;
      alloc_entry = svga->gb_query_map[i];
      while (alloc_entry) {
         next = alloc_entry->next;
         util_bitmask_destroy(alloc_entry->alloc_mask);
         FREE(alloc_entry);
         alloc_entry = next;
      }
      svga->gb_query_map[i] = NULL;
   }

   if (svga->gb_query)
      sws->query_destroy(sws, svga->gb_query);
   svga->gb_query = NULL;

   util_bitmask_destroy(svga->gb_query_alloc_mask);
}

/**
 * Define query and create the gb query object if it is not already created.
 * There is only one gb query object per context which will be shared by
 * queries of all types.
 */
static enum pipe_error
define_query_vgpu10(struct svga_context *svga,
                    struct svga_query *sq, int resultLen)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   int qlen;
   enum pipe_error ret = PIPE_OK;

   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);

   if (svga->gb_query == NULL) {
      /* Create a gb query object */
      svga->gb_query = sws->query_create(sws, SVGA_QUERY_MEM_SIZE);
      if (!svga->gb_query)
         return PIPE_ERROR_OUT_OF_MEMORY;
      svga->gb_query_len = SVGA_QUERY_MEM_SIZE;
      memset (svga->gb_query_map, 0, sizeof(svga->gb_query_map));
      svga->gb_query_alloc_mask = util_bitmask_create();

      /* Bind the query object to the context */
      if (svga->swc->query_bind(svga->swc, svga->gb_query,
                                SVGA_QUERY_FLAG_SET) != PIPE_OK) {
         svga_context_flush(svga, NULL);
         svga->swc->query_bind(svga->swc, svga->gb_query,
                               SVGA_QUERY_FLAG_SET);
      }
   }

   sq->gb_query = svga->gb_query;

   /* Allocate an integer ID for this query */
   sq->id = util_bitmask_add(svga->query_id_bm);
   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
      return PIPE_ERROR_OUT_OF_MEMORY;

   /* Find a slot for this query in the gb object */
   qlen = resultLen + sizeof(SVGA3dQueryState);
   sq->offset = allocate_query(svga, sq->svga_type, qlen);
   if (sq->offset == -1)
      return PIPE_ERROR_OUT_OF_MEMORY;

   SVGA_DBG(DEBUG_QUERY, "   query type=%d qid=0x%x offset=%d\n",
            sq->svga_type, sq->id, sq->offset);

   /**
    * Send SVGA3D commands to define the query
    */
   ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_DefineQuery(svga->swc, sq->id, sq->svga_type, sq->flags);
   }
   if (ret != PIPE_OK)
      return PIPE_ERROR_OUT_OF_MEMORY;

   ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_BindQuery(svga->swc, sq->gb_query, sq->id);
   }
   assert(ret == PIPE_OK);

   ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_SetQueryOffset(svga->swc, sq->id, sq->offset);
   }
   assert(ret == PIPE_OK);

   return PIPE_OK;
}

static enum pipe_error
destroy_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
{
   enum pipe_error ret;

   ret = SVGA3D_vgpu10_DestroyQuery(svga->swc, sq->id);

   /* Deallocate the memory slot allocated for this query */
   deallocate_query(svga, sq);

   return ret;
}


/**
 * Rebind queryies to the context.
 */
static void
rebind_vgpu10_query(struct svga_context *svga)
{
   if (svga->swc->query_bind(svga->swc, svga->gb_query,
                             SVGA_QUERY_FLAG_REF) != PIPE_OK) {
      svga_context_flush(svga, NULL);
      svga->swc->query_bind(svga->swc, svga->gb_query,
                            SVGA_QUERY_FLAG_REF);
   }

   svga->rebind.flags.query = FALSE;
}


static enum pipe_error
begin_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   enum pipe_error ret = PIPE_OK;
   int status = 0;

   sws->fence_reference(sws, &sq->fence, NULL);

   /* Initialize the query state to NEW */
   status = sws->query_init(sws, sq->gb_query, sq->offset, SVGA3D_QUERYSTATE_NEW);
   if (status)
      return PIPE_ERROR;

   if (svga->rebind.flags.query) {
      rebind_vgpu10_query(svga);
   }

   /* Send the BeginQuery command to the device */
   ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_BeginQuery(svga->swc, sq->id);
   }
   return ret;
}

static enum pipe_error
end_query_vgpu10(struct svga_context *svga, struct svga_query *sq)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   enum pipe_error ret = PIPE_OK;

   if (svga->rebind.flags.query) {
      rebind_vgpu10_query(svga);
   }

   ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_EndQuery(svga->swc, sq->id);
   }

   /* Finish fence is copied here from get_query_result_vgpu10. This helps
    * with cases where svga_begin_query might be called again before
    * svga_get_query_result, such as GL_TIME_ELAPSED.
    */
   if (!sq->fence) {
      svga_context_flush(svga, &sq->fence);
   }
   sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);

   return ret;
}

static boolean
get_query_result_vgpu10(struct svga_context *svga, struct svga_query *sq,
                        boolean wait, void *result, int resultLen)
{
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   SVGA3dQueryState queryState;

   if (svga->rebind.flags.query) {
      rebind_vgpu10_query(svga);
   }

   sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);

   if (queryState == SVGA3D_QUERYSTATE_PENDING) {
      if (!wait)
         return FALSE;
      sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
      sws->query_get_result(sws, sq->gb_query, sq->offset, &queryState, result, resultLen);
   }

   assert(queryState == SVGA3D_QUERYSTATE_SUCCEEDED ||
          queryState == SVGA3D_QUERYSTATE_FAILED);

   return TRUE;
}

static struct pipe_query *
svga_create_query(struct pipe_context *pipe,
                  unsigned query_type,
                  unsigned index)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_query *sq;

   assert(query_type < SVGA_QUERY_MAX);

   sq = CALLOC_STRUCT(svga_query);
   if (!sq)
      goto fail;

   /* Allocate an integer ID for the query */
   sq->id = util_bitmask_add(svga->query_id_bm);
   if (sq->id == UTIL_BITMASK_INVALID_INDEX)
      goto fail;

   SVGA_DBG(DEBUG_QUERY, "%s type=%d sq=0x%x id=%d\n", __FUNCTION__,
            query_type, sq, sq->id);

   switch (query_type) {
   case PIPE_QUERY_OCCLUSION_COUNTER:
      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSION;
      if (svga_have_vgpu10(svga)) {
         define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionQueryResult));

         /**
          * In OpenGL, occlusion counter query can be used in conditional
          * rendering; however, in DX10, only OCCLUSION_PREDICATE query can
          * be used for predication. Hence, we need to create an occlusion
          * predicate query along with the occlusion counter query. So when
          * the occlusion counter query is used for predication, the associated
          * query of occlusion predicate type will be used
          * in the SetPredication command.
          */
         sq->predicate = svga_create_query(pipe, PIPE_QUERY_OCCLUSION_PREDICATE, index);

      } else {
         define_query_vgpu9(svga, sq);
      }
      break;
   case PIPE_QUERY_OCCLUSION_PREDICATE:
      assert(svga_have_vgpu10(svga));
      sq->svga_type = SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE;
      define_query_vgpu10(svga, sq, sizeof(SVGADXOcclusionPredicateQueryResult));
      break;
   case PIPE_QUERY_PRIMITIVES_GENERATED:
   case PIPE_QUERY_PRIMITIVES_EMITTED:
   case PIPE_QUERY_SO_STATISTICS:
      assert(svga_have_vgpu10(svga));
      sq->svga_type = SVGA3D_QUERYTYPE_STREAMOUTPUTSTATS;
      define_query_vgpu10(svga, sq,
                          sizeof(SVGADXStreamOutStatisticsQueryResult));
      break;
   case PIPE_QUERY_TIMESTAMP:
      assert(svga_have_vgpu10(svga));
      sq->svga_type = SVGA3D_QUERYTYPE_TIMESTAMP;
      define_query_vgpu10(svga, sq,
                          sizeof(SVGADXTimestampQueryResult));
      break;
   case SVGA_QUERY_NUM_DRAW_CALLS:
   case SVGA_QUERY_NUM_FALLBACKS:
   case SVGA_QUERY_NUM_FLUSHES:
   case SVGA_QUERY_NUM_VALIDATIONS:
   case SVGA_QUERY_MAP_BUFFER_TIME:
   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
   case SVGA_QUERY_NUM_BYTES_UPLOADED:
   case SVGA_QUERY_MEMORY_USED:
   case SVGA_QUERY_NUM_SHADERS:
   case SVGA_QUERY_NUM_RESOURCES:
   case SVGA_QUERY_NUM_STATE_OBJECTS:
   case SVGA_QUERY_NUM_SURFACE_VIEWS:
   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
      break;
   default:
      assert(!"unexpected query type in svga_create_query()");
   }

   sq->type = query_type;

   return &sq->base;

fail:
   FREE(sq);
   return NULL;
}

static void
svga_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   struct svga_query *sq;

   if (!q) {
      destroy_gb_query_obj(svga);
      return;
   }

   sq = svga_query(q);

   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
            sq, sq->id);

   switch (sq->type) {
   case PIPE_QUERY_OCCLUSION_COUNTER:
      if (svga_have_vgpu10(svga)) {
         /* make sure to also destroy any associated predicate query */
         if (sq->predicate)
            svga_destroy_query(pipe, sq->predicate);
         destroy_query_vgpu10(svga, sq);
      } else {
         sws->buffer_destroy(sws, sq->hwbuf);
      }
      sws->fence_reference(sws, &sq->fence, NULL);
      break;
   case PIPE_QUERY_OCCLUSION_PREDICATE:
      assert(svga_have_vgpu10(svga));
      destroy_query_vgpu10(svga, sq);
      sws->fence_reference(sws, &sq->fence, NULL);
      break;
   case PIPE_QUERY_PRIMITIVES_GENERATED:
   case PIPE_QUERY_PRIMITIVES_EMITTED:
   case PIPE_QUERY_SO_STATISTICS:
   case PIPE_QUERY_TIMESTAMP:
      assert(svga_have_vgpu10(svga));
      destroy_query_vgpu10(svga, sq);
      sws->fence_reference(sws, &sq->fence, NULL);
      break;
   case SVGA_QUERY_NUM_DRAW_CALLS:
   case SVGA_QUERY_NUM_FALLBACKS:
   case SVGA_QUERY_NUM_FLUSHES:
   case SVGA_QUERY_NUM_VALIDATIONS:
   case SVGA_QUERY_MAP_BUFFER_TIME:
   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
   case SVGA_QUERY_NUM_BYTES_UPLOADED:
   case SVGA_QUERY_MEMORY_USED:
   case SVGA_QUERY_NUM_SHADERS:
   case SVGA_QUERY_NUM_RESOURCES:
   case SVGA_QUERY_NUM_STATE_OBJECTS:
   case SVGA_QUERY_NUM_SURFACE_VIEWS:
   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
      /* nothing */
      break;
   default:
      assert(!"svga: unexpected query type in svga_destroy_query()");
   }

   /* Free the query id */
   util_bitmask_clear(svga->query_id_bm, sq->id);

   FREE(sq);
}


static boolean
svga_begin_query(struct pipe_context *pipe, struct pipe_query *q)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_query *sq = svga_query(q);
   enum pipe_error ret;

   assert(sq);
   assert(sq->type < SVGA_QUERY_MAX);

   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
            sq, sq->id);

   /* Need to flush out buffered drawing commands so that they don't
    * get counted in the query results.
    */
   svga_hwtnl_flush_retry(svga);

   switch (sq->type) {
   case PIPE_QUERY_OCCLUSION_COUNTER:
      if (svga_have_vgpu10(svga)) {
         ret = begin_query_vgpu10(svga, sq);
         /* also need to start the associated occlusion predicate query */
         if (sq->predicate) {
            enum pipe_error status;
            status = begin_query_vgpu10(svga, svga_query(sq->predicate));
            assert(status == PIPE_OK);
            (void) status;
         }
      } else {
         ret = begin_query_vgpu9(svga, sq);
      }
      assert(ret == PIPE_OK);
      (void) ret;
      break;
   case PIPE_QUERY_OCCLUSION_PREDICATE:
      assert(svga_have_vgpu10(svga));
      ret = begin_query_vgpu10(svga, sq);
      assert(ret == PIPE_OK);
      break;
   case PIPE_QUERY_PRIMITIVES_GENERATED:
   case PIPE_QUERY_PRIMITIVES_EMITTED:
   case PIPE_QUERY_SO_STATISTICS:
   case PIPE_QUERY_TIMESTAMP:
      assert(svga_have_vgpu10(svga));
      ret = begin_query_vgpu10(svga, sq);
      assert(ret == PIPE_OK);
      break;
   case SVGA_QUERY_NUM_DRAW_CALLS:
      sq->begin_count = svga->hud.num_draw_calls;
      break;
   case SVGA_QUERY_NUM_FALLBACKS:
      sq->begin_count = svga->hud.num_fallbacks;
      break;
   case SVGA_QUERY_NUM_FLUSHES:
      sq->begin_count = svga->hud.num_flushes;
      break;
   case SVGA_QUERY_NUM_VALIDATIONS:
      sq->begin_count = svga->hud.num_validations;
      break;
   case SVGA_QUERY_MAP_BUFFER_TIME:
      sq->begin_count = svga->hud.map_buffer_time;
      break;
   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
      sq->begin_count = svga->hud.num_resources_mapped;
      break;
   case SVGA_QUERY_NUM_BYTES_UPLOADED:
      sq->begin_count = svga->hud.num_bytes_uploaded;
      break;
   case SVGA_QUERY_MEMORY_USED:
   case SVGA_QUERY_NUM_SHADERS:
   case SVGA_QUERY_NUM_RESOURCES:
   case SVGA_QUERY_NUM_STATE_OBJECTS:
   case SVGA_QUERY_NUM_SURFACE_VIEWS:
   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
      /* nothing */
      break;
   default:
      assert(!"unexpected query type in svga_begin_query()");
   }

   svga->sq[sq->type] = sq;

   return true;
}


static void
svga_end_query(struct pipe_context *pipe, struct pipe_query *q)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_query *sq = svga_query(q);
   enum pipe_error ret;

   assert(sq);
   assert(sq->type < SVGA_QUERY_MAX);

   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d\n", __FUNCTION__,
            sq, sq->id);

   if (sq->type == PIPE_QUERY_TIMESTAMP && svga->sq[sq->type] != sq)
      svga_begin_query(pipe, q);

   svga_hwtnl_flush_retry(svga);

   assert(svga->sq[sq->type] == sq);

   switch (sq->type) {
   case PIPE_QUERY_OCCLUSION_COUNTER:
      if (svga_have_vgpu10(svga)) {
         ret = end_query_vgpu10(svga, sq);
         /* also need to end the associated occlusion predicate query */
         if (sq->predicate) {
            enum pipe_error status;
            status = end_query_vgpu10(svga, svga_query(sq->predicate));
            assert(status == PIPE_OK);
            (void) status;
         }
      } else {
         ret = end_query_vgpu9(svga, sq);
      }
      assert(ret == PIPE_OK);
      (void) ret;
      /* TODO: Delay flushing. We don't really need to flush here, just ensure
       * that there is one flush before svga_get_query_result attempts to get
       * the result.
       */
      svga_context_flush(svga, NULL);
      break;
   case PIPE_QUERY_OCCLUSION_PREDICATE:
      assert(svga_have_vgpu10(svga));
      ret = end_query_vgpu10(svga, sq);
      assert(ret == PIPE_OK);
      break;
   case PIPE_QUERY_PRIMITIVES_GENERATED:
   case PIPE_QUERY_PRIMITIVES_EMITTED:
   case PIPE_QUERY_SO_STATISTICS:
   case PIPE_QUERY_TIMESTAMP:
      assert(svga_have_vgpu10(svga));
      ret = end_query_vgpu10(svga, sq);
      assert(ret == PIPE_OK);
      break;
   case SVGA_QUERY_NUM_DRAW_CALLS:
      sq->end_count = svga->hud.num_draw_calls;
      break;
   case SVGA_QUERY_NUM_FALLBACKS:
      sq->end_count = svga->hud.num_fallbacks;
      break;
   case SVGA_QUERY_NUM_FLUSHES:
      sq->end_count = svga->hud.num_flushes;
      break;
   case SVGA_QUERY_NUM_VALIDATIONS:
      sq->end_count = svga->hud.num_validations;
      break;
   case SVGA_QUERY_MAP_BUFFER_TIME:
      sq->end_count = svga->hud.map_buffer_time;
      break;
   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
      sq->end_count = svga->hud.num_resources_mapped;
      break;
   case SVGA_QUERY_NUM_BYTES_UPLOADED:
      sq->end_count = svga->hud.num_bytes_uploaded;
      break;
   case SVGA_QUERY_MEMORY_USED:
   case SVGA_QUERY_NUM_SHADERS:
   case SVGA_QUERY_NUM_RESOURCES:
   case SVGA_QUERY_NUM_STATE_OBJECTS:
   case SVGA_QUERY_NUM_SURFACE_VIEWS:
   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
      /* nothing */
      break;
   default:
      assert(!"unexpected query type in svga_end_query()");
   }
   svga->sq[sq->type] = NULL;
}


static boolean
svga_get_query_result(struct pipe_context *pipe,
                      struct pipe_query *q,
                      boolean wait,
                      union pipe_query_result *vresult)
{
   struct svga_screen *svgascreen = svga_screen(pipe->screen);
   struct svga_context *svga = svga_context(pipe);
   struct svga_query *sq = svga_query(q);
   uint64_t *result = (uint64_t *)vresult;
   boolean ret = TRUE;

   assert(sq);

   SVGA_DBG(DEBUG_QUERY, "%s sq=0x%x id=%d wait: %d\n",
            __FUNCTION__, sq, sq->id, wait);

   switch (sq->type) {
   case PIPE_QUERY_OCCLUSION_COUNTER:
      if (svga_have_vgpu10(svga)) {
         SVGADXOcclusionQueryResult occResult;
         ret = get_query_result_vgpu10(svga, sq, wait,
                                       (void *)&occResult, sizeof(occResult));
         *result = (uint64_t)occResult.samplesRendered;
      } else {
         ret = get_query_result_vgpu9(svga, sq, wait, (uint64_t *)result);
      }
      break;
   case PIPE_QUERY_OCCLUSION_PREDICATE: {
      SVGADXOcclusionPredicateQueryResult occResult;
      assert(svga_have_vgpu10(svga));
      ret = get_query_result_vgpu10(svga, sq, wait,
                                    (void *)&occResult, sizeof(occResult));
      vresult->b = occResult.anySamplesRendered != 0;
      break;
   }
   case PIPE_QUERY_SO_STATISTICS: {
      SVGADXStreamOutStatisticsQueryResult sResult;
      struct pipe_query_data_so_statistics *pResult =
         (struct pipe_query_data_so_statistics *)vresult;

      assert(svga_have_vgpu10(svga));
      ret = get_query_result_vgpu10(svga, sq, wait,
                                    (void *)&sResult, sizeof(sResult));
      pResult->num_primitives_written = sResult.numPrimitivesWritten;
      pResult->primitives_storage_needed = sResult.numPrimitivesRequired;
      break;
   }
   case PIPE_QUERY_TIMESTAMP: {
      SVGADXTimestampQueryResult sResult;

      assert(svga_have_vgpu10(svga));
      ret = get_query_result_vgpu10(svga, sq, wait,
                                    (void *)&sResult, sizeof(sResult));
      *result = (uint64_t)sResult.timestamp;
      break;
   }
   case PIPE_QUERY_PRIMITIVES_GENERATED: {
      SVGADXStreamOutStatisticsQueryResult sResult;

      assert(svga_have_vgpu10(svga));
      ret = get_query_result_vgpu10(svga, sq, wait,
                                    (void *)&sResult, sizeof sResult);
      *result = (uint64_t)sResult.numPrimitivesRequired;
      break;
   }
   case PIPE_QUERY_PRIMITIVES_EMITTED: {
      SVGADXStreamOutStatisticsQueryResult sResult;

      assert(svga_have_vgpu10(svga));
      ret = get_query_result_vgpu10(svga, sq, wait,
                                    (void *)&sResult, sizeof sResult);
      *result = (uint64_t)sResult.numPrimitivesWritten;
      break;
   }
   /* These are per-frame counters */
   case SVGA_QUERY_NUM_DRAW_CALLS:
   case SVGA_QUERY_NUM_FALLBACKS:
   case SVGA_QUERY_NUM_FLUSHES:
   case SVGA_QUERY_NUM_VALIDATIONS:
   case SVGA_QUERY_MAP_BUFFER_TIME:
   case SVGA_QUERY_NUM_RESOURCES_MAPPED:
   case SVGA_QUERY_NUM_BYTES_UPLOADED:
      vresult->u64 = sq->end_count - sq->begin_count;
      break;
   /* These are running total counters */
   case SVGA_QUERY_MEMORY_USED:
      vresult->u64 = svgascreen->hud.total_resource_bytes;
      break;
   case SVGA_QUERY_NUM_SHADERS:
      vresult->u64 = svga->hud.num_shaders;
      break;
   case SVGA_QUERY_NUM_RESOURCES:
      vresult->u64 = svgascreen->hud.num_resources;
      break;
   case SVGA_QUERY_NUM_STATE_OBJECTS:
      vresult->u64 = svga->hud.num_state_objects;
      break;
   case SVGA_QUERY_NUM_SURFACE_VIEWS:
      vresult->u64 = svga->hud.num_surface_views;
      break;
   case SVGA_QUERY_NUM_GENERATE_MIPMAP:
      vresult->u64 = svga->hud.num_generate_mipmap;
      break;
   default:
      assert(!"unexpected query type in svga_get_query_result");
   }

   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, *((uint64_t *)vresult));

   return ret;
}

static void
svga_render_condition(struct pipe_context *pipe, struct pipe_query *q,
                      boolean condition, uint mode)
{
   struct svga_context *svga = svga_context(pipe);
   struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
   struct svga_query *sq = svga_query(q);
   SVGA3dQueryId queryId;
   enum pipe_error ret;

   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);

   assert(svga_have_vgpu10(svga));
   if (sq == NULL) {
      queryId = SVGA3D_INVALID_ID;
   }
   else {
      assert(sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION ||
             sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSIONPREDICATE);

      if (sq->svga_type == SVGA3D_QUERYTYPE_OCCLUSION) {
         assert(sq->predicate);
         /**
          * For conditional rendering, make sure to use the associated
          * predicate query.
          */
         sq = svga_query(sq->predicate);
      }
      queryId = sq->id;

      if ((mode == PIPE_RENDER_COND_WAIT ||
           mode == PIPE_RENDER_COND_BY_REGION_WAIT) && sq->fence) {
         sws->fence_finish(sws, sq->fence, SVGA_FENCE_FLAG_QUERY);
      }
   }

   ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
                                      (uint32) condition);
   if (ret != PIPE_OK) {
      svga_context_flush(svga, NULL);
      ret = SVGA3D_vgpu10_SetPredication(svga->swc, queryId,
                                         (uint32) condition);
   }
}


/*
 * This function is a workaround because we lack the ability to query
 * renderer's time synchornously.
 */
static uint64_t
svga_get_timestamp(struct pipe_context *pipe)
{
   struct pipe_query *q = svga_create_query(pipe, PIPE_QUERY_TIMESTAMP, 0);
   union pipe_query_result result;

   svga_begin_query(pipe, q);
   svga_end_query(pipe,q);
   svga_get_query_result(pipe, q, TRUE, &result);
   svga_destroy_query(pipe, q);

   return result.u64;
}


void
svga_init_query_functions(struct svga_context *svga)
{
   svga->pipe.create_query = svga_create_query;
   svga->pipe.destroy_query = svga_destroy_query;
   svga->pipe.begin_query = svga_begin_query;
   svga->pipe.end_query = svga_end_query;
   svga->pipe.get_query_result = svga_get_query_result;
   svga->pipe.render_condition = svga_render_condition;
   svga->pipe.get_timestamp = svga_get_timestamp;
}