summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/swr/swr_context.h1
-rw-r--r--src/gallium/drivers/swr/swr_draw.cpp9
-rw-r--r--src/gallium/drivers/swr/swr_screen.cpp13
-rw-r--r--src/gallium/drivers/swr/swr_screen.h2
-rw-r--r--src/gallium/drivers/swr/swr_state.cpp37
5 files changed, 51 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
index 753cbf3115a..8bed78f869a 100644
--- a/src/gallium/drivers/swr/swr_context.h
+++ b/src/gallium/drivers/swr/swr_context.h
@@ -51,6 +51,7 @@
#define SWR_NEW_FRAMEBUFFER (1 << 15)
#define SWR_NEW_CLIP (1 << 16)
#define SWR_NEW_SO (1 << 17)
+#define SWR_LARGE_CLIENT_DRAW (1<<18) // Indicates client draw will block
namespace std
{
diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp
index eae4b572df3..62ad3f716cf 100644
--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -188,6 +188,15 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
info->instance_count,
info->start,
info->start_instance);
+
+ /* On large client-buffer draw, we used client buffer directly, without
+ * copy. Block until draw is finished.
+ * VMD is an example application that benefits from this. */
+ if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) {
+ struct swr_screen *screen = swr_screen(pipe->screen);
+ swr_fence_submit(ctx, screen->flush_fence);
+ swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
+ }
}
diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
index 9352181fae7..c8ff810e165 100644
--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -61,6 +61,9 @@
#define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */
#define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */
+/* Default max client_copy_limit */
+#define SWR_CLIENT_COPY_LIMIT 32768
+
/* Flag indicates creation of alternate surface, to prevent recursive loop
* in resource creation when msaa_force_enable is set. */
#define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
@@ -1067,6 +1070,16 @@ swr_destroy_screen(struct pipe_screen *p_screen)
static void
swr_validate_env_options(struct swr_screen *screen)
{
+ /* The client_copy_limit sets a maximum on the amount of user-buffer memory
+ * copied to scratch space on a draw. Past this, the draw will access
+ * user-buffer directly and then block. This is faster than queuing many
+ * large client draws. */
+ screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT;
+ int client_copy_limit =
+ debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT);
+ if (client_copy_limit > 0)
+ screen->client_copy_limit = client_copy_limit;
+
/* XXX msaa under development, disable by default for now */
screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */
diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h
index a10f4265354..a11ea9f41de 100644
--- a/src/gallium/drivers/swr/swr_screen.h
+++ b/src/gallium/drivers/swr/swr_screen.h
@@ -43,8 +43,10 @@ struct swr_screen {
struct sw_winsys *winsys;
+ /* Configurable environment settings */
boolean msaa_force_enable;
uint8_t msaa_max_count;
+ uint32_t client_copy_limit;
HANDLE hJitMgr;
diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
index 4eef60681a7..534f3c59b1c 100644
--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -1267,12 +1267,20 @@ swr_update_derived(struct pipe_context *pipe,
partial_inbounds = 0;
min_vertex_index = info.min_index;
- /* Copy only needed vertices to scratch space */
size = AlignUp(size, 4);
- const void *ptr = (const uint8_t *) vb->buffer.user + base;
- ptr = (uint8_t *)swr_copy_to_scratch_space(
- ctx, &ctx->scratch->vertex_buffer, ptr, size);
- p_data = (const uint8_t *)ptr - base;
+ /* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block. This is
+ * faster than queuing many large client draws. */
+ if (size >= screen->client_copy_limit) {
+ post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+ p_data = (const uint8_t *) vb->buffer.user;
+ } else {
+ /* Copy only needed vertices to scratch space */
+ const void *ptr = (const uint8_t *) vb->buffer.user + base;
+ ptr = (uint8_t *)swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->vertex_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr - base;
+ }
}
swrVertexBuffers[i] = {0};
@@ -1311,12 +1319,19 @@ swr_update_derived(struct pipe_context *pipe,
size = info.count * pitch;
size = AlignUp(size, 4);
-
- /* Copy indices to scratch space */
- const void *ptr = info.index.user;
- ptr = swr_copy_to_scratch_space(
- ctx, &ctx->scratch->index_buffer, ptr, size);
- p_data = (const uint8_t *)ptr;
+ /* If size of client memory copy is too large, don't copy. The
+ * draw will access user-buffer directly and then block. This is
+ * faster than queuing many large client draws. */
+ if (size >= screen->client_copy_limit) {
+ post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW;
+ p_data = (const uint8_t *) info.index.user;
+ } else {
+ /* Copy indices to scratch space */
+ const void *ptr = info.index.user;
+ ptr = swr_copy_to_scratch_space(
+ ctx, &ctx->scratch->index_buffer, ptr, size);
+ p_data = (const uint8_t *)ptr;
+ }
}
SWR_INDEX_BUFFER_STATE swrIndexBuffer;