diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/radeon_uvd.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/r600/radeon_vce.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/radeon_video.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_uvd.c | 10 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_uvd_enc.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_vce.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_vcn_dec.c | 18 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_vcn_enc.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_video.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/radeon/radeon_winsys.h | 14 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.c | 3 | ||||
-rw-r--r-- | src/gallium/include/pipe/p_defines.h | 8 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 96 | ||||
-rw-r--r-- | src/gallium/winsys/amdgpu/drm/amdgpu_bo.h | 3 |
17 files changed, 140 insertions, 62 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index a77f58242e3..9085be4e2f3 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -438,7 +438,9 @@ static void *evergreen_create_compute_state(struct pipe_context *ctx, /* Upload code + ROdata */ shader->code_bo = r600_compute_buffer_alloc_vram(rctx->screen, shader->bc.ndw * 4); - p = r600_buffer_map_sync_with_rings(&rctx->b, shader->code_bo, PIPE_TRANSFER_WRITE); + p = r600_buffer_map_sync_with_rings( + &rctx->b, shader->code_bo, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); //TODO: use util_memcpy_cpu_to_le32 ? memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4); rctx->b.ws->buffer_unmap(shader->code_bo->buf); diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 7029be24f4b..4ba77c535f9 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2772,7 +2772,9 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, return NULL; } - bytecode = r600_buffer_map_sync_with_rings(&rctx->b, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); + bytecode = r600_buffer_map_sync_with_rings + (&rctx->b, shader->buffer, + PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED | RADEON_TRANSFER_TEMPORARY); bytecode += shader->offset / 4; if (R600_BIG_ENDIAN) { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 408939d1105..fc826470d69 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -141,7 +141,9 @@ static int store_shader(struct pipe_context *ctx, if (shader->bo == NULL) { return -ENOMEM; } - ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); + ptr = r600_buffer_map_sync_with_rings( + &rctx->b, shader->bo, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (R600_BIG_ENDIAN) { for (i = 0; i < shader->shader.bc.ndw; ++i) { ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); diff --git a/src/gallium/drivers/r600/radeon_uvd.c b/src/gallium/drivers/r600/radeon_uvd.c index 495a93dc55a..5568f2138e4 100644 --- a/src/gallium/drivers/r600/radeon_uvd.c +++ b/src/gallium/drivers/r600/radeon_uvd.c @@ -152,7 +152,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; @@ -1068,7 +1069,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder, dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1121,7 +1122,8 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, } dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + PIPE_TRANSFER_WRITE | + RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; diff --git a/src/gallium/drivers/r600/radeon_vce.c b/src/gallium/drivers/r600/radeon_vce.c index 60ba12a593a..e38b927b1d4 100644 --- a/src/gallium/drivers/r600/radeon_vce.c +++ b/src/gallium/drivers/r600/radeon_vce.c @@ -353,7 +353,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) { *size = ptr[4] - ptr[9]; diff --git a/src/gallium/drivers/r600/radeon_video.c b/src/gallium/drivers/r600/radeon_video.c index 02fcf77d4ff..8e0af448be5 100644 --- a/src/gallium/drivers/r600/radeon_video.c +++ b/src/gallium/drivers/r600/radeon_video.c @@ -97,11 +97,13 @@ bool rvid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, if (!rvid_create_buffer(screen, new_buf, new_size, new_buf->usage)) goto error; - src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ); + src = ws->buffer_map(old_buf.res->buf, cs, + PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); if (!src) goto error; - dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE); + dst = ws->buffer_map(new_buf->res->buf, cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dst) goto error; diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 62af1a311c2..ca066e89823 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -148,7 +148,8 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec) buf = &dec->msg_fb_it_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = (struct ruvd_msg *)ptr; @@ -1015,7 +1016,7 @@ static void ruvd_begin_frame(struct pipe_video_codec *decoder, dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1060,8 +1061,9 @@ static void ruvd_decode_bitstream(struct pipe_video_codec *decoder, return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + dec->bs_ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc.c b/src/gallium/drivers/radeon/radeon_uvd_enc.c index 4384e5e1646..3164dbb2c20 100644 --- a/src/gallium/drivers/radeon/radeon_uvd_enc.c +++ b/src/gallium/drivers/radeon/radeon_uvd_enc.c @@ -263,9 +263,9 @@ radeon_uvd_enc_get_feedback(struct pipe_video_codec *encoder, if (NULL != size) { radeon_uvd_enc_feedback_t *fb_data = - (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map(fb->res->buf, - enc->cs, - PIPE_TRANSFER_READ_WRITE); + (radeon_uvd_enc_feedback_t *) enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (!fb_data->status) *size = fb_data->bitstream_size; diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index 310d1654b05..94df06e88c6 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -352,7 +352,9 @@ static void rvce_get_feedback(struct pipe_video_codec *encoder, struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) { *size = ptr[4] - ptr[9]; diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c index 1ee85ae3d3f..e402af21a64 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_dec.c +++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c @@ -941,7 +941,9 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec, si_vid_clear_buffer(dec->base.context, &dec->ctx); /* ctx needs probs table */ - ptr = dec->ws->buffer_map(dec->ctx.res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map( + dec->ctx.res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); fill_probs_table(ptr); dec->ws->buffer_unmap(dec->ctx.res->buf); } @@ -1034,7 +1036,8 @@ static void map_msg_fb_it_probs_buf(struct radeon_decoder *dec) buf = &dec->msg_fb_it_probs_buffers[dec->cur_buffer]; /* and map it for CPU access */ - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); /* calc buffer offsets */ dec->msg = ptr; @@ -1312,7 +1315,7 @@ static void radeon_dec_begin_frame(struct pipe_video_codec *decoder, dec->bs_size = 0; dec->bs_ptr = dec->ws->buffer_map( dec->bs_buffers[dec->cur_buffer].res->buf, - dec->cs, PIPE_TRANSFER_WRITE); + dec->cs, PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); } /** @@ -1357,8 +1360,9 @@ static void radeon_dec_decode_bitstream(struct pipe_video_codec *decoder, return; } - dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, - PIPE_TRANSFER_WRITE); + dec->bs_ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dec->bs_ptr) return; @@ -1543,7 +1547,9 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context, void *ptr; buf = &dec->msg_fb_it_probs_buffers[i]; - ptr = dec->ws->buffer_map(buf->res->buf, dec->cs, PIPE_TRANSFER_WRITE); + ptr = dec->ws->buffer_map( + buf->res->buf, dec->cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); ptr += FB_BUFFER_OFFSET + FB_BUFFER_SIZE; fill_probs_table(ptr); dec->ws->buffer_unmap(buf->res->buf); diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c index e8676f6c721..7d64a28a405 100644 --- a/src/gallium/drivers/radeon/radeon_vcn_enc.c +++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c @@ -244,7 +244,9 @@ static void radeon_enc_get_feedback(struct pipe_video_codec *encoder, struct rvid_buffer *fb = feedback; if (size) { - uint32_t *ptr = enc->ws->buffer_map(fb->res->buf, enc->cs, PIPE_TRANSFER_READ_WRITE); + uint32_t *ptr = enc->ws->buffer_map( + fb->res->buf, enc->cs, + PIPE_TRANSFER_READ_WRITE | RADEON_TRANSFER_TEMPORARY); if (ptr[1]) *size = ptr[6]; else diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index a39ce4cc73e..bb1173e8005 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -88,11 +88,13 @@ bool si_vid_resize_buffer(struct pipe_screen *screen, struct radeon_cmdbuf *cs, if (!si_vid_create_buffer(screen, new_buf, new_size, new_buf->usage)) goto error; - src = ws->buffer_map(old_buf.res->buf, cs, PIPE_TRANSFER_READ); + src = ws->buffer_map(old_buf.res->buf, cs, + PIPE_TRANSFER_READ | RADEON_TRANSFER_TEMPORARY); if (!src) goto error; - dst = ws->buffer_map(new_buf->res->buf, cs, PIPE_TRANSFER_WRITE); + dst = ws->buffer_map(new_buf->res->buf, cs, + PIPE_TRANSFER_WRITE | RADEON_TRANSFER_TEMPORARY); if (!dst) goto error; diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 49f8bb279e5..a56ff75ad24 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -76,6 +76,15 @@ enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_SYNCHRONIZED = 8 }; +enum radeon_transfer_flags { + /* Indicates that the caller will unmap the buffer. + * + * Not unmapping buffers is an important performance optimization for + * OpenGL (avoids kernel overhead for frequently mapped buffers). + */ + RADEON_TRANSFER_TEMPORARY = (PIPE_TRANSFER_DRV_PRV << 0), +}; + #define RADEON_SPARSE_PAGE_SIZE (64 * 1024) enum ring_type { @@ -294,9 +303,12 @@ struct radeon_winsys { * Map the entire data store of a buffer object into the client's address * space. * + * Callers are expected to unmap buffers again if and only if the + * RADEON_TRANSFER_TEMPORARY flag is set in \p usage. + * * \param buf A winsys buffer object to map. * \param cs A command stream to flush if the buffer is referenced by it. - * \param usage A bitmask of the PIPE_TRANSFER_* flags. + * \param usage A bitmask of the PIPE_TRANSFER_* and RADEON_TRANSFER_* flags. * \return The pointer at the beginning of the buffer. */ void *(*buffer_map)(struct pb_buffer *buf, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 19522cc97b1..d455fb5db6a 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -5293,7 +5293,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) /* Upload. */ ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL, PIPE_TRANSFER_READ_WRITE | - PIPE_TRANSFER_UNSYNCHRONIZED); + PIPE_TRANSFER_UNSYNCHRONIZED | + RADEON_TRANSFER_TEMPORARY); /* Don't use util_memcpy_cpu_to_le32. LLVM binaries are * endian-independent. */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index 693f041b1da..e99895d30d8 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -341,7 +341,13 @@ enum pipe_transfer_usage * PIPE_RESOURCE_FLAG_MAP_COHERENT must be set when creating * the resource. */ - PIPE_TRANSFER_COHERENT = (1 << 14) + PIPE_TRANSFER_COHERENT = (1 << 14), + + /** + * This and higher bits are reserved for private use by drivers. Drivers + * should use this as (PIPE_TRANSFER_DRV_PRV << i). + */ + PIPE_TRANSFER_DRV_PRV = (1 << 24) }; /** diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c index 4cbaa8056ad..73336dd3e01 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c @@ -56,6 +56,7 @@ amdgpu_bo_create(struct radeon_winsys *rws, unsigned alignment, enum radeon_bo_domain domain, enum radeon_bo_flag flags); +static void amdgpu_bo_unmap(struct pb_buffer *buf); static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, enum radeon_bo_usage usage) @@ -173,6 +174,12 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) assert(bo->bo && "must not be called for slab entries"); + if (!bo->is_user_ptr && bo->cpu_ptr) { + bo->cpu_ptr = NULL; + amdgpu_bo_unmap(&bo->base); + } + assert(bo->is_user_ptr || bo->u.real.map_count == 0); + if (ws->debug_all_bos) { simple_mtx_lock(&ws->global_bo_list_lock); LIST_DEL(&bo->u.real.global_list_item); @@ -195,14 +202,6 @@ void amdgpu_bo_destroy(struct pb_buffer *_buf) else if (bo->initial_domain & RADEON_DOMAIN_GTT) ws->allocated_gtt -= align64(bo->base.size, ws->info.gart_page_size); - if (bo->u.real.map_count >= 1) { - if (bo->initial_domain & RADEON_DOMAIN_VRAM) - ws->mapped_vram -= bo->base.size; - else if (bo->initial_domain & RADEON_DOMAIN_GTT) - ws->mapped_gtt -= bo->base.size; - ws->num_mapped_buffers--; - } - simple_mtx_destroy(&bo->lock); FREE(bo); } @@ -219,6 +218,29 @@ static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) amdgpu_bo_destroy(_buf); } +static bool amdgpu_bo_do_map(struct amdgpu_winsys_bo *bo, void **cpu) +{ + assert(!bo->sparse && bo->bo && !bo->is_user_ptr); + int r = amdgpu_bo_cpu_map(bo->bo, cpu); + if (r) { + /* Clear the cache and try again. */ + pb_cache_release_all_buffers(&bo->ws->bo_cache); + r = amdgpu_bo_cpu_map(bo->bo, cpu); + if (r) + return false; + } + + if (p_atomic_inc_return(&bo->u.real.map_count) == 1) { + if (bo->initial_domain & RADEON_DOMAIN_VRAM) + bo->ws->mapped_vram += bo->base.size; + else if (bo->initial_domain & RADEON_DOMAIN_GTT) + bo->ws->mapped_gtt += bo->base.size; + bo->ws->num_mapped_buffers++; + } + + return true; +} + static void *amdgpu_bo_map(struct pb_buffer *buf, struct radeon_cmdbuf *rcs, enum pipe_transfer_usage usage) @@ -226,9 +248,6 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; struct amdgpu_winsys_bo *real; struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; - int r; - void *cpu = NULL; - uint64_t offset = 0; assert(!bo->sparse); @@ -313,9 +332,9 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, } } - /* If the buffer is created from user memory, return the user pointer. */ - if (bo->user_ptr) - return bo->user_ptr; + /* Buffer synchronization has been checked, now actually map the buffer. */ + void *cpu = NULL; + uint64_t offset = 0; if (bo->bo) { real = bo; @@ -324,22 +343,31 @@ static void *amdgpu_bo_map(struct pb_buffer *buf, offset = bo->va - real->va; } - r = amdgpu_bo_cpu_map(real->bo, &cpu); - if (r) { - /* Clear the cache and try again. */ - pb_cache_release_all_buffers(&real->ws->bo_cache); - r = amdgpu_bo_cpu_map(real->bo, &cpu); - if (r) - return NULL; + if (usage & RADEON_TRANSFER_TEMPORARY) { + if (real->is_user_ptr) { + cpu = real->cpu_ptr; + } else { + if (!amdgpu_bo_do_map(real, &cpu)) + return NULL; + } + } else { + cpu = p_atomic_read(&real->cpu_ptr); + if (!cpu) { + simple_mtx_lock(&real->lock); + /* Must re-check due to the possibility of a race. Re-check need not + * be atomic thanks to the lock. */ + cpu = real->cpu_ptr; + if (!cpu) { + if (!amdgpu_bo_do_map(real, &cpu)) { + simple_mtx_unlock(&real->lock); + return NULL; + } + p_atomic_set(&real->cpu_ptr, cpu); + } + simple_mtx_unlock(&real->lock); + } } - if (p_atomic_inc_return(&real->u.real.map_count) == 1) { - if (real->initial_domain & RADEON_DOMAIN_VRAM) - real->ws->mapped_vram += real->base.size; - else if (real->initial_domain & RADEON_DOMAIN_GTT) - real->ws->mapped_gtt += real->base.size; - real->ws->num_mapped_buffers++; - } return (uint8_t*)cpu + offset; } @@ -350,12 +378,15 @@ static void amdgpu_bo_unmap(struct pb_buffer *buf) assert(!bo->sparse); - if (bo->user_ptr) + if (bo->is_user_ptr) return; real = bo->bo ? bo : bo->u.slab.real; - + assert(real->u.real.map_count != 0 && "too many unmaps"); if (p_atomic_dec_zero(&real->u.real.map_count)) { + assert(!real->cpu_ptr && + "too many unmaps or forgot RADEON_TRANSFER_TEMPORARY flag"); + if (real->initial_domain & RADEON_DOMAIN_VRAM) real->ws->mapped_vram -= real->base.size; else if (real->initial_domain & RADEON_DOMAIN_GTT) @@ -1459,6 +1490,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, goto error_va_map; /* Initialize it. */ + bo->is_user_ptr = true; pipe_reference_init(&bo->base.reference, 1); simple_mtx_init(&bo->lock, mtx_plain); bo->bo = buf_handle; @@ -1466,7 +1498,7 @@ static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, bo->base.size = size; bo->base.vtbl = &amdgpu_winsys_bo_vtbl; bo->ws = ws; - bo->user_ptr = pointer; + bo->cpu_ptr = pointer; bo->va = va; bo->u.real.va_handle = va_handle; bo->initial_domain = RADEON_DOMAIN_GTT; @@ -1493,7 +1525,7 @@ error: static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf) { - return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL; + return ((struct amdgpu_winsys_bo*)buf)->is_user_ptr; } static bool amdgpu_bo_is_suballocated(struct pb_buffer *buf) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h index 58e6eed733d..88f4241327d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.h @@ -88,10 +88,11 @@ struct amdgpu_winsys_bo { } u; struct amdgpu_winsys *ws; - void *user_ptr; /* from buffer_from_ptr */ + void *cpu_ptr; /* for user_ptr and permanent maps */ amdgpu_bo_handle bo; /* NULL for slab entries and sparse buffers */ bool sparse; + bool is_user_ptr; bool is_local; uint32_t unique_id; uint64_t va; |