i965: Implemente a tiled fast-path for glReadPixels and glGetTexImage

Added intel_readpixels_tiled_mempcpy and intel_gettexsubimage_tiled_mempcpy functions. These are the fast paths for glReadPixels and glGetTexImage. On chrome, using the RoboHornet 2D Canvas toDataURL test, this patch cuts amount of time spent in glReadPixels by more than half and reduces the time of the entire test by 10%. v2: Jason Ekstrand <[email protected]> - Refactor to make the functions look more like the old intel_tex_subimage_tiled_memcpy - Don't export the readpixels_tiled_memcpy function - Fix some pointer arithmatic bugs in partial image downloads (using ReadPixels with a non-zero x or y offset) - Fix a bug when ReadPixels is performed on an FBO wrapping a texture miplevel other than zero. v3: Jason Ekstrand <[email protected]> - Better documentation fot the *_tiled_memcpy functions - Add target restrictions for renderbuffers wrapping textures v4: Jason Ekstrand <[email protected]> - Only check the return value of brw_bo_map for error and not bo->virtual v5: Jason Ekstrand <[email protected]> - Don't unnecessarily repeat a comment Signed-off-by: Jason Ekstrand <[email protected]> Reviewed-by: Chad Versace <[email protected]>
author: Sisinty Sasmita Patra <[email protected]> 2014-12-12 13:03:21 -0800
committer: Jason Ekstrand <[email protected]> 2015-01-26 17:29:35 -0800
commit: 90bd943f2abafd5e3ea29ab8397fd4682ef74170 (patch)
tree: bdc65b26aeb1d1c8e1adc78496c7d89e44a72663
parent: b52959c6022509514deb672e53c710069eed0237 (diff)
3 files changed, 271 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 0330f7eaaf0..babf8ca0b18 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -39,21 +39,156 @@
 
 #include "brw_context.h"
 #include "intel_screen.h"
+#include "intel_batchbuffer.h"
 #include "intel_blit.h"
 #include "intel_buffers.h"
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_pixel.h"
 #include "intel_buffer_objects.h"
+#include "intel_tiled_memcpy.h"
 
 #define FILE_DEBUG_FLAG DEBUG_PIXEL
 
+/**
+ * \brief A fast path for glReadPixels
+ *
+ * This fast path is taken when the source format is BGRA, RGBA,
+ * A or L and when the texture memory is X- or Y-tiled.  It downloads
+ * the source data by directly mapping the memory without a GTT fence.
+ * This then needs to be de-tiled on the CPU before presenting the data to
+ * the user in the linear fasion.
+ *
+ * This is a performance win over the conventional texture download path.
+ * In the conventional texture download path, the texture is either mapped
+ * through the GTT or copied to a linear buffer with the blitter before
+ * handing off to a software path.  This allows us to avoid round-tripping
+ * through the GPU (in the case where we would be blitting) and do only a
+ * single copy operation.
+ */
+static bool
+intel_readpixels_tiled_memcpy(struct gl_context * ctx,
+                              GLint xoffset, GLint yoffset,
+                              GLsizei width, GLsizei height,
+                              GLenum format, GLenum type,
+                              GLvoid * pixels,
+                              const struct gl_pixelstore_attrib *pack)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorReadBuffer;
+
+   /* This path supports reading from color buffers only */
+   if (rb == NULL)
+      return false;
+
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   int dst_pitch;
+
+   /* The miptree's buffer. */
+   drm_intel_bo *bo;
+
+   int error = 0;
+
+   uint32_t cpp;
+   mem_copy_fn mem_copy = NULL;
+
+   /* This fastpath is restricted to specific renderbuffer types:
+    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
+    * more types.
+    */
+   if (!brw->has_llc ||
+       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+       pixels == NULL ||
+       _mesa_is_bufferobj(pack->BufferObj) ||
+       pack->Alignment > 4 ||
+       pack->SkipPixels > 0 ||
+       pack->SkipRows > 0 ||
+       (pack->RowLength != 0 && pack->RowLength != width) ||
+       pack->SwapBytes ||
+       pack->LsbFirst ||
+       pack->Invert)
+      return false;
+
+   /* This renderbuffer can come from a texture.  In this case, we impose
+    * some of the same restrictions we have for textures and adjust for
+    * miplevels.
+    */
+   if (rb->TexImage) {
+      if (rb->TexImage->TexObject->Target != GL_TEXTURE_2D &&
+          rb->TexImage->TexObject->Target != GL_TEXTURE_RECTANGLE)
+         return false;
+
+      int level = rb->TexImage->Level + rb->TexImage->TexObject->MinLevel;
+
+      /* Adjust x and y offset based on miplevel */
+      xoffset += irb->mt->level[level].level_x;
+      yoffset += irb->mt->level[level].level_y;
+   }
+
+   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
+      return false;
+
+   if (!irb->mt ||
+       (irb->mt->tiling != I915_TILING_X &&
+       irb->mt->tiling != I915_TILING_Y)) {
+      /* The algorithm is written only for X- or Y-tiled memory. */
+      return false;
+   }
+
+   /* Since we are going to read raw data to the miptree, we need to resolve
+    * any pending fast color clears before we start.
+    */
+   intel_miptree_resolve_color(brw, irb->mt);
+
+   bo = irb->mt->bo;
+
+   if (drm_intel_bo_references(brw->batch.bo, bo)) {
+      perf_debug("Flushing before mapping a referenced bo.\n");
+      intel_batchbuffer_flush(brw);
+   }
+
+   error = brw_bo_map(brw, bo, false /* write enable */, "miptree");
+   if (error) {
+      DBG("%s: failed to map bo\n", __FUNCTION__);
+      return false;
+   }
+
+   dst_pitch = _mesa_image_row_stride(pack, width, format, type);
+
+   /* We postponed printing this message until having committed to executing
+    * the function.
+    */
+   DBG("%s: x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
+       "mesa_format=0x%x tiling=%d "
+       "pack=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
+       __FUNCTION__, xoffset, yoffset, width, height,
+       format, type, rb->Format, irb->mt->tiling,
+       pack->Alignment, pack->RowLength, pack->SkipPixels,
+       pack->SkipRows);
+
+   tiled_to_linear(
+      xoffset * cpp, (xoffset + width) * cpp,
+      yoffset, yoffset + height,
+      pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
+      bo->virtual,
+      dst_pitch, irb->mt->pitch,
+      brw->has_swizzling,
+      irb->mt->tiling,
+      mem_copy
+   );
+
+   drm_intel_bo_unmap(bo);
+   return true;
+}
+
 void
 intelReadPixels(struct gl_context * ctx,
                 GLint x, GLint y, GLsizei width, GLsizei height,
                 GLenum format, GLenum type,
                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
 {
+   bool ok;
+
    struct brw_context *brw = brw_context(ctx);
    bool dirty;
 
@@ -67,6 +202,11 @@ intelReadPixels(struct gl_context * ctx,
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
    }
 
+   ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
+                                      format, type, pixels, pack);
+   if(ok)
+      return;
+
    /* glReadPixels() wont dirty the front buffer, so reset the dirty
     * flag after calling intel_prepare_render(). */
    dirty = brw->front_buffer_dirty;
diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
index 27f7f1135bd..f048e846d55 100644
--- a/src/mesa/drivers/dri/i965/intel_tex.h
+++ b/src/mesa/drivers/dri/i965/intel_tex.h
@@ -68,4 +68,13 @@ intel_texsubimage_tiled_memcpy(struct gl_context *ctx,
                                const struct gl_pixelstore_attrib *packing,
                                bool for_glTexImage);
 
+bool
+intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
+                                  struct gl_texture_image *texImage,
+                                  GLint xoffset, GLint yofset,
+                                  GLsizei width, GLsizei height,
+                                  GLenum format, GLenum type,
+                                  GLvoid *pixels,
+                                  const struct gl_pixelstore_attrib *packing);
+
 #endif
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 88b3d020d6a..53efbbb2f74 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -24,7 +24,7 @@
 #include "intel_blit.h"
 #include "intel_fbo.h"
 #include "intel_image.h"
-
+#include "intel_tiled_memcpy.h"
 #include "brw_context.h"
 
 #define FILE_DEBUG_FLAG DEBUG_TEXTURE
@@ -342,11 +342,125 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
                               image->tile_x, image->tile_y);
 }
 
+/**
+ * \brief A fast path for glGetTexImage.
+ *
+ * \see intel_readpixels_tiled_memcpy()
+ */
+bool
+intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
+                                  struct gl_texture_image *texImage,
+                                  GLint xoffset, GLint yoffset,
+                                  GLsizei width, GLsizei height,
+                                  GLenum format, GLenum type,
+                                  GLvoid *pixels,
+                                  const struct gl_pixelstore_attrib *packing)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_texture_image *image = intel_texture_image(texImage);
+   int dst_pitch;
+
+   /* The miptree's buffer. */
+   drm_intel_bo *bo;
+
+   int error = 0;
+
+   uint32_t cpp;
+   mem_copy_fn mem_copy = NULL;
+
+   /* This fastpath is restricted to specific texture types:
+    * a 2D BGRA, RGBA, L8 or A8 texture. It could be generalized to support
+    * more types.
+    *
+    * FINISHME: The restrictions below on packing alignment and packing row
+    * length are likely unneeded now because we calculate the destination stride
+    * with _mesa_image_row_stride. However, before removing the restrictions
+    * we need tests.
+    */
+   if (!brw->has_llc ||
+       !(type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+       !(texImage->TexObject->Target == GL_TEXTURE_2D ||
+         texImage->TexObject->Target == GL_TEXTURE_RECTANGLE) ||
+       pixels == NULL ||
+       _mesa_is_bufferobj(packing->BufferObj) ||
+       packing->Alignment > 4 ||
+       packing->SkipPixels > 0 ||
+       packing->SkipRows > 0 ||
+       (packing->RowLength != 0 && packing->RowLength != width) ||
+       packing->SwapBytes ||
+       packing->LsbFirst ||
+       packing->Invert)
+      return false;
+
+   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+      return false;
+
+   /* If this is a nontrivial texture view, let another path handle it instead. */
+   if (texImage->TexObject->MinLayer)
+      return false;
+
+   if (!image->mt ||
+       (image->mt->tiling != I915_TILING_X &&
+       image->mt->tiling != I915_TILING_Y)) {
+      /* The algorithm is written only for X- or Y-tiled memory. */
+      return false;
+   }
+
+   /* Since we are going to write raw data to the miptree, we need to resolve
+    * any pending fast color clears before we start.
+    */
+   intel_miptree_resolve_color(brw, image->mt);
+
+   bo = image->mt->bo;
+
+   if (drm_intel_bo_references(brw->batch.bo, bo)) {
+      perf_debug("Flushing before mapping a referenced bo.\n");
+      intel_batchbuffer_flush(brw);
+   }
+
+   error = brw_bo_map(brw, bo, false /* write enable */, "miptree");
+   if (error) {
+      DBG("%s: failed to map bo\n", __FUNCTION__);
+      return false;
+   }
+
+   dst_pitch = _mesa_image_row_stride(packing, width, format, type);
+
+   DBG("%s: level=%d x,y=(%d,%d) (w,h)=(%d,%d) format=0x%x type=0x%x "
+       "mesa_format=0x%x tiling=%d "
+       "packing=(alignment=%d row_length=%d skip_pixels=%d skip_rows=%d)\n",
+       __FUNCTION__, texImage->Level, xoffset, yoffset, width, height,
+       format, type, texImage->TexFormat, image->mt->tiling,
+       packing->Alignment, packing->RowLength, packing->SkipPixels,
+       packing->SkipRows);
+
+   int level = texImage->Level + texImage->TexObject->MinLevel;
+
+   /* Adjust x and y offset based on miplevel */
+   xoffset += image->mt->level[level].level_x;
+   yoffset += image->mt->level[level].level_y;
+
+   tiled_to_linear(
+      xoffset * cpp, (xoffset + width) * cpp,
+      yoffset, yoffset + height,
+      pixels - (ptrdiff_t) yoffset * dst_pitch - (ptrdiff_t) xoffset * cpp,
+      bo->virtual,
+      dst_pitch, image->mt->pitch,
+      brw->has_swizzling,
+      image->mt->tiling,
+      mem_copy
+   );
+
+   drm_intel_bo_unmap(bo);
+   return true;
+}
+
 static void
 intel_get_tex_image(struct gl_context *ctx,
                     GLenum format, GLenum type, GLvoid *pixels,
                     struct gl_texture_image *texImage) {
    struct brw_context *brw = brw_context(ctx);
+   bool ok;
 
    DBG("%s\n", __FUNCTION__);
 
@@ -360,6 +474,13 @@ intel_get_tex_image(struct gl_context *ctx,
       perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
    }
 
+   ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, 0, 0,
+                                          texImage->Width, texImage->Height,
+                                          format, type, pixels, &ctx->Pack);
+
+   if(ok)
+      return;
+
    _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage);
 
    DBG("%s - DONE\n", __FUNCTION__);
author	Sisinty Sasmita Patra <[email protected]>	2014-12-12 13:03:21 -0800
committer	Jason Ekstrand <[email protected]>	2015-01-26 17:29:35 -0800
commit	90bd943f2abafd5e3ea29ab8397fd4682ef74170 (patch)
tree	bdc65b26aeb1d1c8e1adc78496c7d89e44a72663
parent	b52959c6022509514deb672e53c710069eed0237 (diff)