diff options
Diffstat (limited to 'src/gallium/winsys/radeon')
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 2 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 160 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 8 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 87 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 17 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_winsys.h | 20 |
6 files changed, 215 insertions, 79 deletions
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 897e9627dbc..6daafc3caed 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -453,7 +453,7 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle *buf, } else { /* Try to avoid busy-waiting in radeon_bo_wait. */ if (p_atomic_read(&bo->num_active_ioctls)) - radeon_drm_cs_sync_flush(cs); + radeon_drm_cs_sync_flush(rcs); } radeon_bo_wait((struct pb_buffer*)bo, RADEON_USAGE_READWRITE); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index c5e7f1e44c2..cab27040bba 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -90,6 +90,10 @@ #define RADEON_CS_RING_COMPUTE 1 #endif +#ifndef RADEON_CS_RING_DMA +#define RADEON_CS_RING_DMA 2 +#endif + #ifndef RADEON_CS_END_OF_FRAME #define RADEON_CS_END_OF_FRAME 0x04 #endif @@ -158,10 +162,8 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc) FREE(csc->relocs); } -DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) -static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param); -static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) +static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws, enum ring_type ring_type) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_drm_cs *cs; @@ -170,7 +172,6 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) if (!cs) { return NULL; } - pipe_semaphore_init(&cs->flush_queued, 0); pipe_semaphore_init(&cs->flush_completed, 0); cs->ws = ws; @@ -189,10 +190,9 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) cs->csc = &cs->csc1; cs->cst = &cs->csc2; cs->base.buf = cs->csc->buf; + cs->base.ring_type = ring_type; p_atomic_inc(&ws->num_cs); - if (cs->ws->num_cpus > 1 && debug_get_option_thread()) - cs->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, cs); return &cs->base; } @@ -246,35 +246,49 @@ int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo) return -1; } -static unsigned radeon_add_reloc(struct radeon_cs_context *csc, +static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, struct radeon_bo *bo, enum radeon_bo_usage usage, enum radeon_bo_domain domains, enum radeon_bo_domain *added_domains) { + struct radeon_cs_context *csc = cs->csc; struct drm_radeon_cs_reloc *reloc; - unsigned i; unsigned hash = bo->handle & (sizeof(csc->is_handle_added)-1); enum radeon_bo_domain rd = usage & RADEON_USAGE_READ ? domains : 0; enum radeon_bo_domain wd = usage & RADEON_USAGE_WRITE ? domains : 0; + bool update_hash = TRUE; + int i; + *added_domains = 0; if (csc->is_handle_added[hash]) { i = csc->reloc_indices_hashlist[hash]; reloc = &csc->relocs[i]; - if (reloc->handle == bo->handle) { - update_reloc_domains(reloc, rd, wd, added_domains); - return i; + if (reloc->handle != bo->handle) { + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = csc->crelocs - 1; i >= 0; i--) { + reloc = &csc->relocs[i]; + if (reloc->handle == bo->handle) { + /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ + break; + } + } } - /* Hash collision, look for the BO in the list of relocs linearly. */ - for (i = csc->crelocs; i != 0;) { - --i; - reloc = &csc->relocs[i]; - if (reloc->handle == bo->handle) { - update_reloc_domains(reloc, rd, wd, added_domains); - + if (i >= 0) { + /* On DMA ring we need to emit as many relocation as there is use of the bo + * thus each time this function is call we should grow add again the bo to + * the relocation buffer + * + * Do not update the hash table if it's dma ring, so that first hash always point + * to first bo relocation which will the one used by the kernel. Following relocation + * will be ignore by the kernel memory placement (but still use by the kernel to + * update the cmd stream with proper buffer offset). + */ + update_hash = FALSE; + update_reloc_domains(reloc, rd, wd, added_domains); + if (cs->base.ring_type != RING_DMA) { csc->reloc_indices_hashlist[hash] = i; - /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ return i; } } @@ -305,7 +319,9 @@ static unsigned radeon_add_reloc(struct radeon_cs_context *csc, reloc->flags = 0; csc->is_handle_added[hash] = TRUE; - csc->reloc_indices_hashlist[hash] = csc->crelocs; + if (update_hash) { + csc->reloc_indices_hashlist[hash] = csc->crelocs; + } csc->chunks[1].length_dw += RELOC_DWORDS; @@ -321,8 +337,7 @@ static unsigned radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs, struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; enum radeon_bo_domain added_domains; - - unsigned index = radeon_add_reloc(cs->csc, bo, usage, domains, &added_domains); + unsigned index = radeon_add_reloc(cs, bo, usage, domains, &added_domains); if (added_domains & RADEON_DOMAIN_GTT) cs->csc->used_gart += bo->base.size; @@ -373,7 +388,6 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)buf; - unsigned index = radeon_get_reloc(cs->csc, bo); if (index == -1) { @@ -385,7 +399,7 @@ static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs, OUT_CS(&cs->base, index * RELOC_DWORDS); } -static void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc) +void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc) { unsigned i; @@ -410,25 +424,15 @@ static void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc) radeon_cs_context_cleanup(csc); } -static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) +/* + * Make sure previous submission of this cs are completed + */ +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs) { - struct radeon_drm_cs *cs = (struct radeon_drm_cs*)param; - - while (1) { - pipe_semaphore_wait(&cs->flush_queued); - if (cs->kill_thread) - break; - radeon_drm_cs_emit_ioctl_oneshot(cs->cst); - pipe_semaphore_signal(&cs->flush_completed); - } - pipe_semaphore_signal(&cs->flush_completed); - return NULL; -} + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs) -{ /* Wait for any pending ioctl to complete. */ - if (cs->thread && cs->flush_started) { + if (cs->ws->thread && cs->flush_started) { pipe_semaphore_wait(&cs->flush_completed); cs->flush_started = 0; } @@ -445,7 +449,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) fprintf(stderr, "radeon: command stream overflowed\n"); } - radeon_drm_cs_sync_flush(cs); + radeon_drm_cs_sync_flush(rcs); /* Flip command streams. */ tmp = cs->csc; @@ -453,8 +457,7 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) cs->cst = tmp; /* If the CS is not empty or overflowed, emit it in a separate thread. */ - if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && - !debug_get_option_noop()) { + if (cs->base.cdw && cs->base.cdw <= RADEON_MAX_CMDBUF_DWORDS && !debug_get_option_noop()) { unsigned i, crelocs = cs->cst->crelocs; cs->cst->chunks[0].length_dw = cs->base.cdw; @@ -464,31 +467,50 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } - cs->cst->flags[0] = 0; - cs->cst->flags[1] = RADEON_CS_RING_GFX; - cs->cst->cs.num_chunks = 2; - if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { - cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; - cs->cst->cs.num_chunks = 3; - } - if (cs->ws->info.r600_virtual_address) { - cs->cst->flags[0] |= RADEON_CS_USE_VM; - cs->cst->cs.num_chunks = 3; - } - if (flags & RADEON_FLUSH_END_OF_FRAME) { - cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; - cs->cst->cs.num_chunks = 3; - } - if (flags & RADEON_FLUSH_COMPUTE) { - cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; + switch (cs->base.ring_type) { + case RING_DMA: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_DMA; cs->cst->cs.num_chunks = 3; + if (cs->ws->info.r600_virtual_address) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + } + break; + default: + case RING_GFX: + cs->cst->flags[0] = 0; + cs->cst->flags[1] = RADEON_CS_RING_GFX; + cs->cst->cs.num_chunks = 2; + if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { + cs->cst->flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; + cs->cst->cs.num_chunks = 3; + } + if (cs->ws->info.r600_virtual_address) { + cs->cst->flags[0] |= RADEON_CS_USE_VM; + cs->cst->cs.num_chunks = 3; + } + if (flags & RADEON_FLUSH_END_OF_FRAME) { + cs->cst->flags[0] |= RADEON_CS_END_OF_FRAME; + cs->cst->cs.num_chunks = 3; + } + if (flags & RADEON_FLUSH_COMPUTE) { + cs->cst->flags[1] = RADEON_CS_RING_COMPUTE; + cs->cst->cs.num_chunks = 3; + } + break; } - if (cs->thread && - (flags & RADEON_FLUSH_ASYNC)) { + if (cs->ws->thread && (flags & RADEON_FLUSH_ASYNC)) { cs->flush_started = 1; - pipe_semaphore_signal(&cs->flush_queued); + radeon_drm_ws_queue_cs(cs->ws, cs); } else { + pipe_mutex_lock(cs->ws->cs_stack_lock); + if (cs->ws->thread) { + while (p_atomic_read(&cs->ws->ncs)) { + pipe_condvar_wait(cs->ws->cs_queue_empty, cs->ws->cs_stack_lock); + } + } + pipe_mutex_unlock(cs->ws->cs_stack_lock); radeon_drm_cs_emit_ioctl_oneshot(cs->cst); } } else { @@ -503,14 +525,8 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) static void radeon_drm_cs_destroy(struct radeon_winsys_cs *rcs) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - radeon_drm_cs_sync_flush(cs); - if (cs->thread) { - cs->kill_thread = 1; - pipe_semaphore_signal(&cs->flush_queued); - pipe_semaphore_wait(&cs->flush_completed); - pipe_thread_wait(cs->thread); - } - pipe_semaphore_destroy(&cs->flush_queued); + + radeon_drm_cs_sync_flush(rcs); pipe_semaphore_destroy(&cs->flush_completed); radeon_cs_context_cleanup(&cs->csc1); radeon_cs_context_cleanup(&cs->csc2); @@ -525,6 +541,7 @@ static void radeon_drm_cs_set_flush(struct radeon_winsys_cs *rcs, void *user) { struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + cs->flush_cs = flush; cs->flush_data = user; } @@ -562,4 +579,5 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) ws->base.cs_flush = radeon_drm_cs_flush; ws->base.cs_set_flush_callback = radeon_drm_cs_set_flush; ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced; + ws->base.cs_sync_flush = radeon_drm_cs_sync_flush; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index a88fba5b40e..570842dc51c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -74,9 +74,8 @@ struct radeon_drm_cs { void (*flush_cs)(void *ctx, unsigned flags); void *flush_data; - pipe_thread thread; - int flush_started, kill_thread; - pipe_semaphore flush_queued, flush_completed; + int flush_started; + pipe_semaphore flush_completed; }; int radeon_get_reloc(struct radeon_cs_context *csc, struct radeon_bo *bo); @@ -118,7 +117,8 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) return bo->num_cs_references != 0; } -void radeon_drm_cs_sync_flush(struct radeon_drm_cs *cs); +void radeon_drm_cs_sync_flush(struct radeon_winsys_cs *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); +void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_cs_context *csc); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index b8a876c1662..d23220df8a8 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -316,6 +316,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) break; } + /* Check for dma */ + ws->info.r600_has_dma = FALSE; + if (ws->info.chip_class >= R700 && ws->info.drm_minor >= 27) { + ws->info.r600_has_dma = TRUE; + } + /* Get GEM info. */ retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, &gem_info, sizeof(gem_info)); @@ -389,12 +395,21 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) { struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; + if (ws->thread) { + ws->kill_thread = 1; + pipe_semaphore_signal(&ws->cs_queued); + pipe_thread_wait(ws->thread); + } + pipe_semaphore_destroy(&ws->cs_queued); + pipe_condvar_destroy(ws->cs_queue_empty); + if (!pipe_reference(&ws->base.reference, NULL)) { return; } pipe_mutex_destroy(ws->hyperz_owner_mutex); pipe_mutex_destroy(ws->cmask_owner_mutex); + pipe_mutex_destroy(ws->cs_stack_lock); ws->cman->destroy(ws->cman); ws->kman->destroy(ws->kman); @@ -477,6 +492,71 @@ static int compare_fd(void *key1, void *key2) return pointer_to_intptr(key1) != pointer_to_intptr(key2); } +void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs) +{ +retry: + pipe_mutex_lock(ws->cs_stack_lock); + if (p_atomic_read(&ws->ncs) >= RING_LAST) { + /* no room left for a flush */ + pipe_mutex_unlock(ws->cs_stack_lock); + goto retry; + } + ws->cs_stack[p_atomic_read(&ws->ncs)] = cs; + p_atomic_inc(&ws->ncs); + pipe_mutex_unlock(ws->cs_stack_lock); + pipe_semaphore_signal(&ws->cs_queued); +} + +static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param) +{ + struct radeon_drm_winsys *ws = (struct radeon_drm_winsys *)param; + struct radeon_drm_cs *cs; + unsigned i, empty_stack; + + while (1) { + pipe_semaphore_wait(&ws->cs_queued); + if (ws->kill_thread) + break; +next: + pipe_mutex_lock(ws->cs_stack_lock); + cs = ws->cs_stack[0]; + pipe_mutex_unlock(ws->cs_stack_lock); + + if (cs) { + radeon_drm_cs_emit_ioctl_oneshot(cs->cst); + + pipe_mutex_lock(ws->cs_stack_lock); + for (i = 1; i < p_atomic_read(&ws->ncs); i++) { + ws->cs_stack[i - 1] = ws->cs_stack[i]; + } + ws->cs_stack[p_atomic_read(&ws->ncs) - 1] = NULL; + empty_stack = p_atomic_dec_zero(&ws->ncs); + if (empty_stack) { + pipe_condvar_signal(ws->cs_queue_empty); + } + pipe_mutex_unlock(ws->cs_stack_lock); + + pipe_semaphore_signal(&cs->flush_completed); + + if (!empty_stack) { + goto next; + } + } + } + pipe_mutex_lock(ws->cs_stack_lock); + for (i = 0; i < p_atomic_read(&ws->ncs); i++) { + pipe_semaphore_signal(&ws->cs_stack[i]->flush_completed); + ws->cs_stack[i] = NULL; + } + p_atomic_set(&ws->ncs, 0); + pipe_condvar_signal(ws->cs_queue_empty); + pipe_mutex_unlock(ws->cs_stack_lock); + return NULL; +} + +DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", TRUE) +static PIPE_THREAD_ROUTINE(radeon_drm_cs_emit_ioctl, param); + struct radeon_winsys *radeon_drm_winsys_create(int fd) { struct radeon_drm_winsys *ws; @@ -531,6 +611,13 @@ struct radeon_winsys *radeon_drm_winsys_create(int fd) pipe_mutex_init(ws->hyperz_owner_mutex); pipe_mutex_init(ws->cmask_owner_mutex); + pipe_mutex_init(ws->cs_stack_lock); + + p_atomic_set(&ws->ncs, 0); + pipe_semaphore_init(&ws->cs_queued, 0); + pipe_condvar_init(ws->cs_queue_empty); + if (ws->num_cpus > 1 && debug_get_option_thread()) + ws->thread = pipe_thread_create(radeon_drm_cs_emit_ioctl, ws); return &ws->base; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index e714127730f..74eb408151b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -33,6 +33,8 @@ #include "radeon_winsys.h" #include "os/os_thread.h" +struct radeon_drm_cs; + enum radeon_generation { DRV_R300, DRV_R600, @@ -58,6 +60,19 @@ struct radeon_drm_winsys { pipe_mutex hyperz_owner_mutex; struct radeon_drm_cs *cmask_owner; pipe_mutex cmask_owner_mutex; + + /* rings submission thread */ + pipe_mutex cs_stack_lock; + pipe_semaphore cs_queued; + /* we cannot use semaphore for empty queue because maintaining an even + * number of call to semaphore_wait and semaphore_signal is, to say the + * least, tricky + */ + pipe_condvar cs_queue_empty; + pipe_thread thread; + int kill_thread; + int ncs; + struct radeon_drm_cs *cs_stack[RING_LAST]; }; static INLINE struct radeon_drm_winsys * @@ -66,4 +81,6 @@ radeon_drm_winsys(struct radeon_winsys *base) return (struct radeon_drm_winsys*)base; } +void radeon_drm_ws_queue_cs(struct radeon_drm_winsys *ws, struct radeon_drm_cs *cs); + #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index d0c48224cc5..7fdef3fad87 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -138,12 +138,19 @@ enum chip_class { TAHITI, }; +enum ring_type { + RING_GFX = 0, + RING_DMA, + RING_LAST, +}; + struct winsys_handle; struct radeon_winsys_cs_handle; struct radeon_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t *buf; /* The command buffer. */ + enum ring_type ring_type; }; struct radeon_info { @@ -170,6 +177,7 @@ struct radeon_info { uint32_t r600_max_pipes; boolean r600_backend_map_valid; boolean r600_virtual_address; + boolean r600_has_dma; }; enum radeon_feature_id { @@ -350,7 +358,7 @@ struct radeon_winsys { * * \param ws The winsys this function is called from. */ - struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws); + struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type); /** * Destroy a command stream. @@ -433,6 +441,12 @@ struct radeon_winsys { boolean (*cs_request_feature)(struct radeon_winsys_cs *cs, enum radeon_feature_id fid, boolean enable); + /** + * Make sure all asynchronous flush of the cs have completed + * + * \param cs A command stream. + */ + void (*cs_sync_flush)(struct radeon_winsys_cs *cs); /** * Initialize surface |