summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-10-19 10:31:40 -0700
committerEric Anholt <eric@anholt.net>2011-11-03 23:48:07 -0700
commiteab201bad4d4f250ca9318a228d1c71561daee1a (patch)
treedc6069e4fe6399ed825b80fad39d5a9abb1e65c3
parenta1488eec38d5a76f4e04245a6ce8302444800805 (diff)
i965/gen6: Improve glReadPixels() performance by blitting to a linear temp.
The readpixels microbenchmark in mesa-demos goes from 47Mpix/sec at 1000x1000 to 450Mpix/sec. The 10x10 sizes stay about the same. Reviewed-by: Brian Paul <brianp@vmware.com>
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c87
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h2
2 files changed, 81 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 9df51d389a3..123dafe4503 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -42,6 +42,7 @@
#include "intel_context.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
+#include "intel_blit.h"
#include "intel_fbo.h"
#include "intel_mipmap_tree.h"
#include "intel_regions.h"
@@ -95,7 +96,7 @@ intel_map_renderbuffer(struct gl_context *ctx,
struct intel_context *intel = intel_context(ctx);
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
GLubyte *map;
- int stride;
+ int stride, flip_stride;
/* We sometimes get called with this by our intel_span.c usage. */
if (!irb->region) {
@@ -145,24 +146,92 @@ intel_map_renderbuffer(struct gl_context *ctx,
x, y, w, h, *out_map, *out_stride);
return;
+ } else if (intel->gen >= 6 &&
+ !(mode & GL_MAP_WRITE_BIT) &&
+ irb->region->tiling == I915_TILING_X) {
+ int dst_stride = ALIGN(w * irb->region->cpp, 4);
+ int src_x, src_y;
+
+ /* On gen6+, we have LLC sharing, which means we can get high-performance
+ * access to linear-mapped buffers. So, blit out a tiled buffer (if
+ * possible, which it isn't really for Y tiling) to a temporary BO and
+ * return a map of that.
+ */
+
+ if (rb->Name) {
+ src_x = x + irb->draw_x;
+ src_y = y + irb->draw_y;
+ } else {
+ src_x = x;
+ src_y = irb->region->height - y - h;
+ }
+
+ irb->map_bo = drm_intel_bo_alloc(intel->bufmgr, "MapRenderbuffer() temp",
+ dst_stride * h, 4096);
+
+ /* We don't do the flip in the blit, because it's always so tricky to get
+ * right.
+ */
+ if (irb->map_bo &&
+ intelEmitCopyBlit(intel,
+ irb->region->cpp,
+ irb->region->pitch, irb->region->bo,
+ 0, irb->region->tiling,
+ dst_stride / irb->region->cpp, irb->map_bo,
+ 0, I915_TILING_NONE,
+ src_x, src_y,
+ 0, 0,
+ w, h,
+ GL_COPY)) {
+ intel_batchbuffer_flush(intel);
+ drm_intel_bo_map(irb->map_bo, false);
+
+ if (rb->Name) {
+ *out_map = irb->map_bo->virtual;
+ *out_stride = dst_stride;
+ } else {
+ *out_map = irb->map_bo->virtual + (h - 1) * dst_stride;
+ *out_stride = -dst_stride;
+ }
+
+ DBG("%s: rb %d (%s) blit mapped: (%d, %d) (%dx%d) -> %p/%d\n",
+ __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format),
+ src_x, src_y, w, h, *out_map, *out_stride);
+
+ return;
+ } else {
+ drm_intel_bo_unreference(irb->map_bo);
+ irb->map_bo = NULL;
+ }
}
map = intel_region_map(intel, irb->region, mode);
- stride = irb->region->pitch * irb->region->cpp;
if (rb->Name == 0) {
- map += stride * (irb->region->height - 1);
- stride = -stride;
+ y = irb->region->height - 1 - y;
+ flip_stride = -stride;
} else {
- map += irb->draw_x * irb->region->cpp;
- map += (int)irb->draw_y * stride;
+ x += irb->draw_x;
+ y += irb->draw_y;
+ flip_stride = stride;
}
+ if (drm_intel_bo_references(intel->batch.bo, irb->region->bo)) {
+ intel_batchbuffer_flush(intel);
+ }
+
+ drm_intel_gem_bo_map_gtt(irb->region->bo);
+
+ map = irb->region->bo->virtual;
map += x * irb->region->cpp;
map += (int)y * stride;
*out_map = map;
- *out_stride = stride;
+ *out_stride = flip_stride;
+
+ DBG("%s: rb %d (%s) gtt mapped: (%d, %d) (%dx%d) -> %p/%d\n",
+ __FUNCTION__, rb->Name, _mesa_get_format_name(rb->Format),
+ x, y, w, h, *out_map, *out_stride);
}
static void
@@ -202,6 +271,10 @@ intel_unmap_renderbuffer(struct gl_context *ctx,
intel_region_unmap(intel, irb->region);
free(irb->map_buffer);
irb->map_buffer = NULL;
+ } else if (irb->map_bo) {
+ drm_intel_bo_unmap(irb->map_bo);
+ drm_intel_bo_unreference(irb->map_bo);
+ irb->map_bo = 0;
} else {
if (irb->region)
intel_region_unmap(intel, irb->region);
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 4bcbecdc9c7..f45afd88514 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -44,6 +44,7 @@ struct intel_renderbuffer
{
struct gl_renderbuffer Base;
struct intel_region *region;
+ drm_intel_bo *map_bo;
void *map_buffer;
GLuint map_x, map_y, map_w, map_h;
GLbitfield map_mode;
@@ -60,7 +61,6 @@ struct intel_renderbuffer
*/
struct gl_renderbuffer *wrapped_depth;
struct gl_renderbuffer *wrapped_stencil;
-
/** \} */
GLuint draw_x, draw_y; /**< Offset of drawing within the region */