diff options
-rw-r--r-- | src/gallium/drivers/swr/swr_context.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_draw.cpp | 9 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_screen.cpp | 13 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_screen.h | 2 | ||||
-rw-r--r-- | src/gallium/drivers/swr/swr_state.cpp | 37 |
5 files changed, 51 insertions, 11 deletions
diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 753cbf3115a..8bed78f869a 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -51,6 +51,7 @@ #define SWR_NEW_FRAMEBUFFER (1 << 15) #define SWR_NEW_CLIP (1 << 16) #define SWR_NEW_SO (1 << 17) +#define SWR_LARGE_CLIENT_DRAW (1<<18) // Indicates client draw will block namespace std { diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index eae4b572df3..62ad3f716cf 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -188,6 +188,15 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) info->instance_count, info->start, info->start_instance); + + /* On large client-buffer draw, we used client buffer directly, without + * copy. Block until draw is finished. + * VMD is an example application that benefits from this. */ + if (ctx->dirty & SWR_LARGE_CLIENT_DRAW) { + struct swr_screen *screen = swr_screen(pipe->screen); + swr_fence_submit(ctx, screen->flush_fence); + swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0); + } } diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index 9352181fae7..c8ff810e165 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -61,6 +61,9 @@ #define SWR_MAX_TEXTURE_CUBE_LEVELS 14 /* 8K x 8K for now */ #define SWR_MAX_TEXTURE_ARRAY_LAYERS 512 /* 8K x 512 / 8K x 8K x 512 */ +/* Default max client_copy_limit */ +#define SWR_CLIENT_COPY_LIMIT 32768 + /* Flag indicates creation of alternate surface, to prevent recursive loop * in resource creation when msaa_force_enable is set. */ #define SWR_RESOURCE_FLAG_ALT_SURFACE (PIPE_RESOURCE_FLAG_DRV_PRIV << 0) @@ -1067,6 +1070,16 @@ swr_destroy_screen(struct pipe_screen *p_screen) static void swr_validate_env_options(struct swr_screen *screen) { + /* The client_copy_limit sets a maximum on the amount of user-buffer memory + * copied to scratch space on a draw. Past this, the draw will access + * user-buffer directly and then block. This is faster than queuing many + * large client draws. */ + screen->client_copy_limit = SWR_CLIENT_COPY_LIMIT; + int client_copy_limit = + debug_get_num_option("SWR_CLIENT_COPY_LIMIT", SWR_CLIENT_COPY_LIMIT); + if (client_copy_limit > 0) + screen->client_copy_limit = client_copy_limit; + /* XXX msaa under development, disable by default for now */ screen->msaa_max_count = 0; /* was SWR_MAX_NUM_MULTISAMPLES; */ diff --git a/src/gallium/drivers/swr/swr_screen.h b/src/gallium/drivers/swr/swr_screen.h index a10f4265354..a11ea9f41de 100644 --- a/src/gallium/drivers/swr/swr_screen.h +++ b/src/gallium/drivers/swr/swr_screen.h @@ -43,8 +43,10 @@ struct swr_screen { struct sw_winsys *winsys; + /* Configurable environment settings */ boolean msaa_force_enable; uint8_t msaa_max_count; + uint32_t client_copy_limit; HANDLE hJitMgr; diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index 4eef60681a7..534f3c59b1c 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -1267,12 +1267,20 @@ swr_update_derived(struct pipe_context *pipe, partial_inbounds = 0; min_vertex_index = info.min_index; - /* Copy only needed vertices to scratch space */ size = AlignUp(size, 4); - const void *ptr = (const uint8_t *) vb->buffer.user + base; - ptr = (uint8_t *)swr_copy_to_scratch_space( - ctx, &ctx->scratch->vertex_buffer, ptr, size); - p_data = (const uint8_t *)ptr - base; + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW; + p_data = (const uint8_t *) vb->buffer.user; + } else { + /* Copy only needed vertices to scratch space */ + const void *ptr = (const uint8_t *) vb->buffer.user + base; + ptr = (uint8_t *)swr_copy_to_scratch_space( + ctx, &ctx->scratch->vertex_buffer, ptr, size); + p_data = (const uint8_t *)ptr - base; + } } swrVertexBuffers[i] = {0}; @@ -1311,12 +1319,19 @@ swr_update_derived(struct pipe_context *pipe, size = info.count * pitch; size = AlignUp(size, 4); - - /* Copy indices to scratch space */ - const void *ptr = info.index.user; - ptr = swr_copy_to_scratch_space( - ctx, &ctx->scratch->index_buffer, ptr, size); - p_data = (const uint8_t *)ptr; + /* If size of client memory copy is too large, don't copy. The + * draw will access user-buffer directly and then block. This is + * faster than queuing many large client draws. */ + if (size >= screen->client_copy_limit) { + post_update_dirty_flags |= SWR_LARGE_CLIENT_DRAW; + p_data = (const uint8_t *) info.index.user; + } else { + /* Copy indices to scratch space */ + const void *ptr = info.index.user; + ptr = swr_copy_to_scratch_space( + ctx, &ctx->scratch->index_buffer, ptr, size); + p_data = (const uint8_t *)ptr; + } } SWR_INDEX_BUFFER_STATE swrIndexBuffer; |