diff options
author | Kenneth Graunke <[email protected]> | 2014-08-21 14:41:17 -0700 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2014-10-13 06:55:08 -0700 |
commit | 02ca66fbc3e2b272afcb9bae66348d3b1900e2c3 (patch) | |
tree | 96613387f81fee43143b87e1da156c0dccf8b751 /src/mesa | |
parent | 4c766c7959192b455ae752a7759ffc159e0524a6 (diff) |
i965: Use unsynchronized maps for the program cache on LLC platforms.
There's no reason to stall on pwrite - the CPU always appends to the
buffer and never modifies existing contents, and the GPU never writes
it. Further, the CPU always appends new data before submitting a batch
that requires it.
This code predates the unsynchronized mapping feature, so we simply
didn't have the option when it was written.
Ideally, we would do this for non-LLC platforms too, but unsynchronized
mapping support only exists for LLC systems.
Saves a bunch of stall avoidance copies when uploading shaders.
v2: Rebase on changes to previous patch.
Signed-off-by: Kenneth Graunke <[email protected]>
Reviewed-by: Kristian Høgsberg <[email protected]> [v1]
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state_cache.c | 35 |
1 files changed, 28 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index f4cae8f560b..eb7452ec130 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -172,14 +172,23 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) drm_intel_bo *new_bo; new_bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", new_size, 64); + if (brw->has_llc) + drm_intel_gem_bo_map_unsynchronized(new_bo); /* Copy any existing data that needs to be saved. */ if (cache->next_offset != 0) { - drm_intel_bo_map(cache->bo, false); - drm_intel_bo_subdata(new_bo, 0, cache->next_offset, cache->bo->virtual); - drm_intel_bo_unmap(cache->bo); + if (brw->has_llc) { + memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset); + } else { + drm_intel_bo_map(cache->bo, false); + drm_intel_bo_subdata(new_bo, 0, cache->next_offset, + cache->bo->virtual); + drm_intel_bo_unmap(cache->bo); + } } + if (brw->has_llc) + drm_intel_bo_unmap(cache->bo); drm_intel_bo_unreference(cache->bo); cache->bo = new_bo; cache->bo_used_by_gpu = false; @@ -200,6 +209,7 @@ brw_try_upload_using_copy(struct brw_cache *cache, const void *data, const void *aux) { + struct brw_context *brw = cache->brw; int i; struct brw_cache_item *item; @@ -221,9 +231,11 @@ brw_try_upload_using_copy(struct brw_cache *cache, continue; } - drm_intel_bo_map(cache->bo, false); + if (!brw->has_llc) + drm_intel_bo_map(cache->bo, false); ret = memcmp(cache->bo->virtual + item->offset, data, item->size); - drm_intel_bo_unmap(cache->bo); + if (!brw->has_llc) + drm_intel_bo_unmap(cache->bo); if (ret) continue; @@ -256,7 +268,7 @@ brw_upload_item_data(struct brw_cache *cache, /* If we would block on writing to an in-use program BO, just * recreate it. */ - if (cache->bo_used_by_gpu) { + if (!brw->has_llc && cache->bo_used_by_gpu) { perf_debug("Copying busy program cache buffer.\n"); brw_cache_new_bo(cache, cache->bo->size); } @@ -279,6 +291,7 @@ brw_upload_cache(struct brw_cache *cache, uint32_t *out_offset, void *out_aux) { + struct brw_context *brw = cache->brw; struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash; void *tmp; @@ -319,7 +332,11 @@ brw_upload_cache(struct brw_cache *cache, cache->n_items++; /* Copy data to the buffer */ - drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); + if (brw->has_llc) { + memcpy((char *) cache->bo->virtual + item->offset, data, data_size); + } else { + drm_intel_bo_subdata(cache->bo, item->offset, data_size, data); + } *out_offset = item->offset; *(void **)out_aux = (void *)((char *)item->key + item->key_size); @@ -341,6 +358,8 @@ brw_init_caches(struct brw_context *brw) cache->bo = drm_intel_bo_alloc(brw->bufmgr, "program cache", 4096, 64); + if (brw->has_llc) + drm_intel_gem_bo_map_unsynchronized(cache->bo); cache->aux_compare[BRW_VS_PROG] = brw_vs_prog_data_compare; cache->aux_compare[BRW_GS_PROG] = brw_gs_prog_data_compare; @@ -407,6 +426,8 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) DBG("%s\n", __FUNCTION__); + if (brw->has_llc) + drm_intel_bo_unmap(cache->bo); drm_intel_bo_unreference(cache->bo); cache->bo = NULL; brw_clear_cache(brw, cache); |