/*
 * Copyright 2014, 2015 Red Hat.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
#include "util/u_format.h"
#include "util/u_inlines.h"
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "virgl_context.h"
#include "virgl_resource.h"
#include "virgl_screen.h"

/* A (soft) limit for the amount of memory we want to allow for queued staging
 * resources. This is used to decide when we should force a flush, in order to
 * avoid exhausting virtio-gpu memory.
 */
#define VIRGL_QUEUED_STAGING_RES_SIZE_LIMIT (128 * 1024 * 1024)

/* We need to flush to properly sync the transfer with the current cmdbuf.
 * But there are cases where the flushing can be skipped:
 *
 *  - synchronization is disabled
 *  - the resource is not referenced by the current cmdbuf
 */
static bool virgl_res_needs_flush(struct virgl_context *vctx,
                                  struct virgl_transfer *trans)
{
   struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
   struct virgl_resource *res = virgl_resource(trans->base.resource);

   if (trans->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)
      return false;

   if (!vws->res_is_referenced(vws, vctx->cbuf, res->hw_res))
      return false;

   return true;
}

/* We need to read back from the host storage to make sure the guest storage
 * is up-to-date.  But there are cases where the readback can be skipped:
 *
 *  - the content can be discarded
 *  - the host storage is read-only
 *
 * Note that PIPE_TRANSFER_WRITE without discard bits requires readback.
 * PIPE_TRANSFER_READ becomes irrelevant.  PIPE_TRANSFER_UNSYNCHRONIZED and
 * PIPE_TRANSFER_FLUSH_EXPLICIT are also irrelevant.
 */
static bool virgl_res_needs_readback(struct virgl_context *vctx,
                                     struct virgl_resource *res,
                                     unsigned usage, unsigned level)
{
   if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
                PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))
      return false;

   if (res->clean_mask & (1 << level))
      return false;

   return true;
}

enum virgl_transfer_map_type
virgl_resource_transfer_prepare(struct virgl_context *vctx,
                                struct virgl_transfer *xfer)
{
   struct virgl_screen *vs = virgl_screen(vctx->base.screen);
   struct virgl_winsys *vws = vs->vws;
   struct virgl_resource *res = virgl_resource(xfer->base.resource);
   enum virgl_transfer_map_type map_type = VIRGL_TRANSFER_MAP_HW_RES;
   bool flush;
   bool readback;
   bool wait;

   /* there is no way to map the host storage currently */
   if (xfer->base.usage & PIPE_TRANSFER_MAP_DIRECTLY)
      return VIRGL_TRANSFER_MAP_ERROR;

   /* We break the logic down into four steps
    *
    * step 1: determine the required operations independently
    * step 2: look for chances to skip the operations
    * step 3: resolve dependencies between the operations
    * step 4: execute the operations
    */

   flush = virgl_res_needs_flush(vctx, xfer);
   readback = virgl_res_needs_readback(vctx, res, xfer->base.usage,
                                       xfer->base.level);
   /* We need to wait for all cmdbufs, current or previous, that access the
    * resource to finish unless synchronization is disabled.
    */
   wait = !(xfer->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED);

   /* When the transfer range consists of only uninitialized data, we can
    * assume the GPU is not accessing the range and readback is unnecessary.
    * We can proceed as if PIPE_TRANSFER_UNSYNCHRONIZED and
    * PIPE_TRANSFER_DISCARD_RANGE are set.
    */
   if (res->u.b.target == PIPE_BUFFER &&
         !util_ranges_intersect(&res->valid_buffer_range, xfer->base.box.x,
            xfer->base.box.x + xfer->base.box.width)) {
      flush = false;
      readback = false;
      wait = false;
   }

   /* When the resource is busy but its content can be discarded, we can
    * replace its HW resource or use a staging buffer to avoid waiting.
    */
   if (wait && (xfer->base.usage & (PIPE_TRANSFER_DISCARD_RANGE |
                                    PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE))) {
      bool can_realloc = false;
      bool can_staging = false;

      /* A PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE transfer may be followed by
       * PIPE_TRANSFER_UNSYNCHRONIZED transfers to non-overlapping regions.
       * It cannot be treated as a PIPE_TRANSFER_DISCARD_RANGE transfer,
       * otherwise those following unsynchronized transfers may overwrite
       * valid data.
       */
      if (xfer->base.usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
         can_realloc = virgl_can_rebind_resource(vctx, &res->u.b);
      } else {
         can_staging = vctx->transfer_uploader &&
                       !vctx->transfer_uploader_in_use;
      }

      /* discard implies no readback */
      assert(!readback);

      if (can_realloc || can_staging) {
         /* Both map types have some costs.  Do them only when the resource is
          * (or will be) busy for real.  Otherwise, set wait to false.
          */
         wait = (flush || vws->resource_is_busy(vws, res->hw_res));
         if (wait) {
            map_type = (can_realloc) ?
               VIRGL_TRANSFER_MAP_REALLOC :
               VIRGL_TRANSFER_MAP_STAGING;
            wait = false;

            /* There is normally no need to flush either, unless the amount of
             * memory we are using for staging resources starts growing, in
             * which case we want to flush to keep our memory consumption in
             * check.
             */
            flush = (vctx->queued_staging_res_size >
               VIRGL_QUEUED_STAGING_RES_SIZE_LIMIT);
         }
      }
   }

   /* readback has some implications */
   if (readback) {
      /* Readback is yet another command and is transparent to the state
       * trackers.  It should be waited for in all cases, including when
       * PIPE_TRANSFER_UNSYNCHRONIZED is set.
       */
      wait = true;

      /* When the transfer queue has pending writes to this transfer's region,
       * we have to flush before readback.
       */
      if (!flush && virgl_transfer_queue_is_queued(&vctx->queue, xfer))
         flush = true;
   }

   if (flush)
      vctx->base.flush(&vctx->base, NULL, 0);

   /* If we are not allowed to block, and we know that we will have to wait,
    * either because the resource is busy, or because it will become busy due
    * to a readback, return early to avoid performing an incomplete
    * transfer_get. Such an incomplete transfer_get may finish at any time,
    * during which another unsynchronized map could write to the resource
    * contents, leaving the contents in an undefined state.
    */
   if ((xfer->base.usage & PIPE_TRANSFER_DONTBLOCK) &&
       (readback || (wait && vws->resource_is_busy(vws, res->hw_res))))
      return VIRGL_TRANSFER_MAP_ERROR;

   if (readback) {
      vws->transfer_get(vws, res->hw_res, &xfer->base.box, xfer->base.stride,
                        xfer->l_stride, xfer->offset, xfer->base.level);
   }

   if (wait)
      vws->resource_wait(vws, res->hw_res);

   return map_type;
}

static struct pipe_resource *virgl_resource_create(struct pipe_screen *screen,
                                                   const struct pipe_resource *templ)
{
   unsigned vbind;
   struct virgl_screen *vs = virgl_screen(screen);
   struct virgl_resource *res = CALLOC_STRUCT(virgl_resource);

   res->u.b = *templ;
   res->u.b.screen = &vs->base;
   pipe_reference_init(&res->u.b.reference, 1);
   vbind = pipe_to_virgl_bind(vs, templ->bind, templ->flags);
   virgl_resource_layout(&res->u.b, &res->metadata);

   if ((vs->caps.caps.v2.capability_bits & VIRGL_CAP_APP_TWEAK_SUPPORT) &&
       vs->tweak_gles_emulate_bgra &&
      (templ->format == PIPE_FORMAT_B8G8R8A8_SRGB ||
        templ->format == PIPE_FORMAT_B8G8R8A8_UNORM ||
        templ->format == PIPE_FORMAT_B8G8R8X8_SRGB ||
        templ->format == PIPE_FORMAT_B8G8R8X8_UNORM)) {
      vbind |= VIRGL_BIND_PREFER_EMULATED_BGRA;
   }

   res->hw_res = vs->vws->resource_create(vs->vws, templ->target,
                                          templ->format, vbind,
                                          templ->width0,
                                          templ->height0,
                                          templ->depth0,
                                          templ->array_size,
                                          templ->last_level,
                                          templ->nr_samples,
                                          res->metadata.total_size);
   if (!res->hw_res) {
      FREE(res);
      return NULL;
   }

   res->clean_mask = (1 << VR_MAX_TEXTURE_2D_LEVELS) - 1;

   if (templ->target == PIPE_BUFFER) {
      util_range_init(&res->valid_buffer_range);
      virgl_buffer_init(res);
   } else {
      virgl_texture_init(res);
   }

   return &res->u.b;

}

static struct pipe_resource *virgl_resource_from_handle(struct pipe_screen *screen,
                                                        const struct pipe_resource *templ,
                                                        struct winsys_handle *whandle,
                                                        unsigned usage)
{
   struct virgl_screen *vs = virgl_screen(screen);
   if (templ->target == PIPE_BUFFER)
      return NULL;

   struct virgl_resource *res = CALLOC_STRUCT(virgl_resource);
   res->u.b = *templ;
   res->u.b.screen = &vs->base;
   pipe_reference_init(&res->u.b.reference, 1);

   res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
   if (!res->hw_res) {
      FREE(res);
      return NULL;
   }

   virgl_texture_init(res);

   return &res->u.b;
}

void virgl_init_screen_resource_functions(struct pipe_screen *screen)
{
    screen->resource_create = virgl_resource_create;
    screen->resource_from_handle = virgl_resource_from_handle;
    screen->resource_get_handle = u_resource_get_handle_vtbl;
    screen->resource_destroy = u_resource_destroy_vtbl;
}

static bool virgl_buffer_transfer_extend(struct pipe_context *ctx,
                                         struct pipe_resource *resource,
                                         unsigned usage,
                                         const struct pipe_box *box,
                                         const void *data)
{
   struct virgl_context *vctx = virgl_context(ctx);
   struct virgl_resource *vbuf = virgl_resource(resource);
   struct virgl_transfer dummy_trans = { 0 };
   bool flush;
   struct virgl_transfer *queued;

   /*
    * Attempts to short circuit the entire process of mapping and unmapping
    * a resource if there is an existing transfer that can be extended.
    * Pessimestically falls back if a flush is required.
    */
   dummy_trans.base.resource = resource;
   dummy_trans.base.usage = usage;
   dummy_trans.base.box = *box;
   dummy_trans.base.stride = vbuf->metadata.stride[0];
   dummy_trans.base.layer_stride = vbuf->metadata.layer_stride[0];
   dummy_trans.offset = box->x;

   flush = virgl_res_needs_flush(vctx, &dummy_trans);
   if (flush && util_ranges_intersect(&vbuf->valid_buffer_range,
                                      box->x, box->x + box->width))
      return false;

   queued = virgl_transfer_queue_extend(&vctx->queue, &dummy_trans);
   if (!queued || !queued->hw_res_map)
      return false;

   memcpy(queued->hw_res_map + dummy_trans.offset, data, box->width);
   util_range_add(&vbuf->valid_buffer_range, box->x, box->x + box->width);

   return true;
}

static void virgl_buffer_subdata(struct pipe_context *pipe,
                                 struct pipe_resource *resource,
                                 unsigned usage, unsigned offset,
                                 unsigned size, const void *data)
{
   struct pipe_transfer *transfer;
   uint8_t *map;
   struct pipe_box box;

   assert(!(usage & PIPE_TRANSFER_READ));

   /* the write flag is implicit by the nature of buffer_subdata */
   usage |= PIPE_TRANSFER_WRITE;

   if (offset == 0 && size == resource->width0)
      usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
   else
      usage |= PIPE_TRANSFER_DISCARD_RANGE;

   u_box_1d(offset, size, &box);

   if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
       virgl_buffer_transfer_extend(pipe, resource, usage, &box, data))
      return;

   map = pipe->transfer_map(pipe, resource, 0, usage, &box, &transfer);
   if (map) {
      memcpy(map, data, size);
      pipe_transfer_unmap(pipe, transfer);
   }
}

void virgl_init_context_resource_functions(struct pipe_context *ctx)
{
    ctx->transfer_map = u_transfer_map_vtbl;
    ctx->transfer_flush_region = u_transfer_flush_region_vtbl;
    ctx->transfer_unmap = u_transfer_unmap_vtbl;
    ctx->buffer_subdata = virgl_buffer_subdata;
    ctx->texture_subdata = u_default_texture_subdata;
}

void virgl_resource_layout(struct pipe_resource *pt,
                           struct virgl_resource_metadata *metadata)
{
   unsigned level, nblocksy;
   unsigned width = pt->width0;
   unsigned height = pt->height0;
   unsigned depth = pt->depth0;
   unsigned buffer_size = 0;

   for (level = 0; level <= pt->last_level; level++) {
      unsigned slices;

      if (pt->target == PIPE_TEXTURE_CUBE)
         slices = 6;
      else if (pt->target == PIPE_TEXTURE_3D)
         slices = depth;
      else
         slices = pt->array_size;

      nblocksy = util_format_get_nblocksy(pt->format, height);
      metadata->stride[level] = util_format_get_stride(pt->format, width);
      metadata->layer_stride[level] = nblocksy * metadata->stride[level];
      metadata->level_offset[level] = buffer_size;

      buffer_size += slices * metadata->layer_stride[level];

      width = u_minify(width, 1);
      height = u_minify(height, 1);
      depth = u_minify(depth, 1);
   }

   if (pt->nr_samples <= 1)
      metadata->total_size = buffer_size;
   else /* don't create guest backing store for MSAA */
      metadata->total_size = 0;
}

struct virgl_transfer *
virgl_resource_create_transfer(struct virgl_context *vctx,
                               struct pipe_resource *pres,
                               const struct virgl_resource_metadata *metadata,
                               unsigned level, unsigned usage,
                               const struct pipe_box *box)
{
   struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;
   struct virgl_transfer *trans;
   enum pipe_format format = pres->format;
   const unsigned blocksy = box->y / util_format_get_blockheight(format);
   const unsigned blocksx = box->x / util_format_get_blockwidth(format);

   unsigned offset = metadata->level_offset[level];
   if (pres->target == PIPE_TEXTURE_CUBE ||
       pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
       pres->target == PIPE_TEXTURE_3D ||
       pres->target == PIPE_TEXTURE_2D_ARRAY) {
      offset += box->z * metadata->layer_stride[level];
   }
   else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
      offset += box->z * metadata->stride[level];
      assert(box->y == 0);
   } else if (pres->target == PIPE_BUFFER) {
      assert(box->y == 0 && box->z == 0);
   } else {
      assert(box->z == 0);
   }

   offset += blocksy * metadata->stride[level];
   offset += blocksx * util_format_get_blocksize(format);

   trans = slab_alloc(&vctx->transfer_pool);
   if (!trans)
      return NULL;

   /* note that trans is not zero-initialized */
   trans->base.resource = NULL;
   pipe_resource_reference(&trans->base.resource, pres);
   trans->hw_res = NULL;
   vws->resource_reference(vws, &trans->hw_res, virgl_resource(pres)->hw_res);

   trans->base.level = level;
   trans->base.usage = usage;
   trans->base.box = *box;
   trans->base.stride = metadata->stride[level];
   trans->base.layer_stride = metadata->layer_stride[level];
   trans->offset = offset;
   util_range_init(&trans->range);
   trans->copy_src_res = NULL;
   trans->copy_src_offset = 0;

   if (trans->base.resource->target != PIPE_TEXTURE_3D &&
       trans->base.resource->target != PIPE_TEXTURE_CUBE &&
       trans->base.resource->target != PIPE_TEXTURE_1D_ARRAY &&
       trans->base.resource->target != PIPE_TEXTURE_2D_ARRAY &&
       trans->base.resource->target != PIPE_TEXTURE_CUBE_ARRAY)
      trans->l_stride = 0;
   else
      trans->l_stride = trans->base.layer_stride;

   return trans;
}

void virgl_resource_destroy_transfer(struct virgl_context *vctx,
                                     struct virgl_transfer *trans)
{
   struct virgl_winsys *vws = virgl_screen(vctx->base.screen)->vws;

   pipe_resource_reference(&trans->copy_src_res, NULL);

   util_range_destroy(&trans->range);
   vws->resource_reference(vws, &trans->hw_res, NULL);
   pipe_resource_reference(&trans->base.resource, NULL);
   slab_free(&vctx->transfer_pool, trans);
}

void virgl_resource_destroy(struct pipe_screen *screen,
                            struct pipe_resource *resource)
{
   struct virgl_screen *vs = virgl_screen(screen);
   struct virgl_resource *res = virgl_resource(resource);

   if (res->u.b.target == PIPE_BUFFER)
      util_range_destroy(&res->valid_buffer_range);

   vs->vws->resource_reference(vs->vws, &res->hw_res, NULL);
   FREE(res);
}

boolean virgl_resource_get_handle(struct pipe_screen *screen,
                                  struct pipe_resource *resource,
                                  struct winsys_handle *whandle)
{
   struct virgl_screen *vs = virgl_screen(screen);
   struct virgl_resource *res = virgl_resource(resource);

   if (res->u.b.target == PIPE_BUFFER)
      return FALSE;

   return vs->vws->resource_get_handle(vs->vws, res->hw_res,
                                       res->metadata.stride[0],
                                       whandle);
}

void virgl_resource_dirty(struct virgl_resource *res, uint32_t level)
{
   if (res) {
      if (res->u.b.target == PIPE_BUFFER)
         res->clean_mask &= ~1;
      else
         res->clean_mask &= ~(1 << level);
   }
}

/* Calculate the minimum size of the memory required to service a resource
 * transfer map. Also return the stride and layer_stride for the corresponding
 * layout.
 */
static unsigned virgl_transfer_map_size(struct virgl_transfer *vtransfer,
                                        unsigned *out_stride,
                                        unsigned *out_layer_stride)
{
   struct pipe_resource *pres = vtransfer->base.resource;
   struct pipe_box *box = &vtransfer->base.box;
   unsigned stride;
   unsigned layer_stride;
   unsigned size;

   assert(out_stride);
   assert(out_layer_stride);

   stride = util_format_get_stride(pres->format, box->width);
   layer_stride = util_format_get_2d_size(pres->format, stride, box->height);

   if (pres->target == PIPE_TEXTURE_CUBE ||
       pres->target == PIPE_TEXTURE_CUBE_ARRAY ||
       pres->target == PIPE_TEXTURE_3D ||
       pres->target == PIPE_TEXTURE_2D_ARRAY) {
      size = box->depth * layer_stride;
   } else if (pres->target == PIPE_TEXTURE_1D_ARRAY) {
      size = box->depth * stride;
   } else {
      size = layer_stride;
   }

   *out_stride = stride;
   *out_layer_stride = layer_stride;

   return size;
}

/* Maps a region from the transfer uploader to service the transfer. */
void *virgl_transfer_uploader_map(struct virgl_context *vctx,
                                  struct virgl_transfer *vtransfer)
{
   struct virgl_resource *vres = virgl_resource(vtransfer->base.resource);
   unsigned size;
   unsigned align_offset;
   unsigned stride;
   unsigned layer_stride;
   void *map_addr;

   assert(vctx->transfer_uploader);
   assert(!vctx->transfer_uploader_in_use);

   size = virgl_transfer_map_size(vtransfer, &stride, &layer_stride);

   /* For buffers we need to ensure that the start of the buffer would be
    * aligned to VIRGL_MAP_BUFFER_ALIGNMENT, even if our transfer doesn't
    * actually include it. To achieve this we may need to allocate a slightly
    * larger range from the upload buffer, and later update the uploader
    * resource offset and map address to point to the requested x coordinate
    * within that range.
    *
    * 0       A       2A      3A
    * |-------|---bbbb|bbbbb--|
    *             |--------|    ==> size
    *         |---|             ==> align_offset
    *         |------------|    ==> allocation of size + align_offset
    */
   align_offset = vres->u.b.target == PIPE_BUFFER ?
                  vtransfer->base.box.x % VIRGL_MAP_BUFFER_ALIGNMENT :
                  0;

   u_upload_alloc(vctx->transfer_uploader, 0, size + align_offset,
                  VIRGL_MAP_BUFFER_ALIGNMENT,
                  &vtransfer->copy_src_offset,
                  &vtransfer->copy_src_res, &map_addr);
   if (map_addr) {
      /* Update source offset and address to point to the requested x coordinate
       * if we have an align_offset (see above for more information). */
      vtransfer->copy_src_offset += align_offset;
      map_addr += align_offset;

      /* Mark as dirty, since we are updating the host side resource
       * without going through the corresponding guest side resource, and
       * hence the two will diverge.
       */
      virgl_resource_dirty(vres, vtransfer->base.level);

      /* The pointer returned by u_upload_alloc already has +offset
       * applied. */
      vctx->transfer_uploader_in_use = true;

      /* We are using the minimum required size to hold the contents,
       * possibly using a layout different from the layout of the resource,
       * so update the transfer strides accordingly.
       */
      vtransfer->base.stride = stride;
      vtransfer->base.layer_stride = layer_stride;

      /* Track the total size of active staging resources. */
      vctx->queued_staging_res_size += size + align_offset;
   }

   return map_addr;
}

bool
virgl_resource_realloc(struct virgl_context *vctx, struct virgl_resource *res)
{
   struct virgl_screen *vs = virgl_screen(vctx->base.screen);
   const struct pipe_resource *templ = &res->u.b;
   unsigned vbind;
   struct virgl_hw_res *hw_res;

   vbind = pipe_to_virgl_bind(vs, templ->bind, templ->flags);
   hw_res = vs->vws->resource_create(vs->vws,
                                     templ->target,
                                     templ->format,
                                     vbind,
                                     templ->width0,
                                     templ->height0,
                                     templ->depth0,
                                     templ->array_size,
                                     templ->last_level,
                                     templ->nr_samples,
                                     res->metadata.total_size);
   if (!hw_res)
      return false;

   vs->vws->resource_reference(vs->vws, &res->hw_res, NULL);
   res->hw_res = hw_res;

   util_range_set_empty(&res->valid_buffer_range);

   /* count toward the staging resource size limit */
   vctx->queued_staging_res_size += res->metadata.total_size;

   virgl_rebind_resource(vctx, &res->u.b);

   return true;
}