summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/intel
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2013-01-15 22:17:23 -0800
committerKenneth Graunke <[email protected]>2013-02-06 10:00:22 -0800
commit0b3bebbaacf42ae07f712b5693f7b00fad3ff35e (patch)
treef055c62228c1f4da008f095d498393dd833ae723 /src/mesa/drivers/dri/intel
parent29aef6cce8114410ef3e82e46de6938d412a5fde (diff)
i965: Implement CopyTexSubImage2D via BLORP (and use it by default).
The BLT engine has many limitations. Currently, it can only blit X-tiled buffers (since we don't have a kernel API to whack the BLT tiling mode register), which means all depth/stencil operations get punted to meta code, which can be very CPU-intensive. Even if we used the BLT engine, it can't blit between buffers with different tiling modes, such as an X-tiled non-MSAA ARGB8888 texture and a Y-tiled CMS ARGB8888 renderbuffer. This is a fundamental limitation, and the only way around that is to use BLORP. Previously, BLORP only handled BlitFramebuffer. This patch adds an additional frontend for doing CopyTexSubImage. It also makes it the default. This is partly to increase testing and avoid hiding bugs, and partly because the BLORP path can already handle more cases. With trivial extensions, it should be able to handle everything the BLT can. This helps PlaneShift massively, which tries to CopyTexSubImage2D between depth buffers whenever a player casts a spell. Since these are Y-tiled, we hit meta and software ReadPixels paths, eating 99% CPU while delivering ~1 FPS. This is particularly bad in an MMO setting because people cast spells all the time. It also helps Xonotic in 4X MSAA mode. At default power management settings, I measured a 6.35138% +/- 0.672548% performance boost (n=5). (This data is from v1 of the patch.) No Piglit regressions on Ivybridge (v3) or Sandybridge (v2). v2: Create a fake intel_renderbuffer to wrap the destination texture image and then reuse do_blorp_blit rather than reimplementing most of it. Remove unnecessary clipping code and conditional rendering check. v3: Reuse formats_match() to centralize checks; delete temporary renderbuffers. Reorganize the code. v4: Actually copy stencil when dealing with separate stencil buffers but packed depth/stencil formats. Tested by a new Piglit test. NOTE: This is a candidate for the 9.1 branch. Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Paul Berry <[email protected]> [v4] Reviewed-by: Ian Romanick <[email protected]> [v3] Reviewed-and-tested-by: Carl Worth <[email protected]> [v2] Tested-by: Martin Steigerwald <[email protected]> [v3]
Diffstat (limited to 'src/mesa/drivers/dri/intel')
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c30
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c32
3 files changed, 58 insertions, 8 deletions
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 481080944dd..37ecbd1701d 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -531,6 +531,36 @@ intel_renderbuffer_update_wrapper(struct intel_context *intel,
return true;
}
+/**
+ * Create a fake intel_renderbuffer that wraps a gl_texture_image.
+ */
+struct intel_renderbuffer *
+intel_create_fake_renderbuffer_wrapper(struct intel_context *intel,
+ struct gl_texture_image *image)
+{
+ struct gl_context *ctx = &intel->ctx;
+ struct intel_renderbuffer *irb;
+ struct gl_renderbuffer *rb;
+
+ irb = CALLOC_STRUCT(intel_renderbuffer);
+ if (!irb) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+ return NULL;
+ }
+
+ rb = &irb->Base.Base;
+
+ _mesa_init_renderbuffer(rb, 0);
+ rb->ClassID = INTEL_RB_CLASS;
+
+ if (!intel_renderbuffer_update_wrapper(intel, irb, image, image->Face)) {
+ intel_delete_renderbuffer(ctx, rb);
+ return NULL;
+ }
+
+ return irb;
+}
+
void
intel_renderbuffer_set_draw_offset(struct intel_renderbuffer *irb)
{
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index 9c48e9c1e8f..f135dead3ee 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -140,6 +140,10 @@ intel_create_wrapped_renderbuffer(struct gl_context * ctx,
int width, int height,
gl_format format);
+struct intel_renderbuffer *
+intel_create_fake_renderbuffer_wrapper(struct intel_context *intel,
+ struct gl_texture_image *image);
+
extern void
intel_fbo_init(struct intel_context *intel);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index c9cbcf480d5..5acdb42c3ac 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -41,6 +41,9 @@
#include "intel_fbo.h"
#include "intel_tex.h"
#include "intel_blit.h"
+#ifndef I915
+#include "brw_context.h"
+#endif
#define FILE_DEBUG_FLAG DEBUG_TEXTURE
@@ -177,15 +180,28 @@ intelCopyTexSubImage(struct gl_context *ctx, GLuint dims,
GLint x, GLint y,
GLsizei width, GLsizei height)
{
- if (dims == 3 || !intel_copy_texsubimage(intel_context(ctx),
- intel_texture_image(texImage),
- xoffset, yoffset,
- intel_renderbuffer(rb), x, y, width, height)) {
- fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
- _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
- xoffset, yoffset, zoffset,
- rb, x, y, width, height);
+ struct intel_context *intel = intel_context(ctx);
+ if (dims != 3) {
+#ifndef I915
+ /* Try BLORP first. It can handle almost everything. */
+ if (brw_blorp_copytexsubimage(intel, rb, texImage, x, y,
+ xoffset, yoffset, width, height))
+ return;
+#endif
+
+ /* Next, try the BLT engine. */
+ if (intel_copy_texsubimage(intel_context(ctx),
+ intel_texture_image(texImage),
+ xoffset, yoffset,
+ intel_renderbuffer(rb), x, y, width, height))
+ return;
}
+
+ /* Finally, fall back to meta. This will likely be slow. */
+ fallback_debug("%s - fallback to swrast\n", __FUNCTION__);
+ _mesa_meta_CopyTexSubImage(ctx, dims, texImage,
+ xoffset, yoffset, zoffset,
+ rb, x, y, width, height);
}