diff options
author | Marek Olšák <[email protected]> | 2017-11-02 00:00:53 +0100 |
---|---|---|
committer | Marek Olšák <[email protected]> | 2017-11-06 16:23:20 +0100 |
commit | 4b0dc098b2561c07c59f7dab2813640a25789bf1 (patch) | |
tree | dac245ade98bd4e4dc5ba804d81771c6ef91339d /src | |
parent | a5d3999c31e2460f690b561b41170bb7bc24fc65 (diff) |
gallium/u_threaded: don't map big VRAM buffers for the first upload directly
This improves Paraview "many spheres" performance 4x along with the radeonsi
commit.
Reviewed-by: Nicolai Hähnle <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/auxiliary/util/u_threaded_context.c | 14 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/u_threaded_context.h | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/r600_buffer_common.c | 10 |
3 files changed, 28 insertions, 2 deletions
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index 0f232580532..ccce12b00ce 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -1284,6 +1284,20 @@ tc_improve_map_buffer_flags(struct threaded_context *tc, if (usage & tc_flags) return usage; + /* Use the staging upload if it's preferred. */ + if (usage & (PIPE_TRANSFER_DISCARD_RANGE | + PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) && + !(usage & PIPE_TRANSFER_PERSISTENT) && + /* Try not to decrement the counter if it's not positive. Still racy, + * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */ + tres->max_forced_staging_uploads > 0 && + p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) { + usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE | + PIPE_TRANSFER_UNSYNCHRONIZED); + + return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE; + } + /* Sparse buffers can't be mapped directly and can't be reallocated * (fully invalidated). That may just be a radeonsi limitation, but * the threaded context must obey it with radeonsi. diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h index 8977b03cd20..ac7bc3dec73 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.h +++ b/src/gallium/auxiliary/util/u_threaded_context.h @@ -241,6 +241,12 @@ struct threaded_resource { * pointers. */ bool is_shared; bool is_user_ptr; + + /* If positive, prefer DISCARD_RANGE with a staging buffer over any other + * method of CPU access when map flags allow it. Useful for buffers that + * are too large for the visible VRAM window. + */ + int max_forced_staging_uploads; }; struct threaded_transfer { diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 67daaa40053..92521f47792 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -191,10 +191,15 @@ void si_init_resource_fields(struct r600_common_screen *rscreen, res->vram_usage = 0; res->gart_usage = 0; - if (res->domains & RADEON_DOMAIN_VRAM) + if (res->domains & RADEON_DOMAIN_VRAM) { res->vram_usage = size; - else if (res->domains & RADEON_DOMAIN_GTT) + + res->b.max_forced_staging_uploads = + rscreen->info.has_dedicated_vram && + size >= rscreen->info.vram_vis_size / 4 ? 1 : 0; + } else if (res->domains & RADEON_DOMAIN_GTT) { res->gart_usage = size; + } } bool si_alloc_resource(struct r600_common_screen *rscreen, @@ -289,6 +294,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx, pb_reference(&rdst->buf, rsrc->buf); rdst->gpu_address = rsrc->gpu_address; rdst->b.b.bind = rsrc->b.b.bind; + rdst->b.max_forced_staging_uploads = rsrc->b.max_forced_staging_uploads; rdst->flags = rsrc->flags; assert(rdst->vram_usage == rsrc->vram_usage); |