diff options
author | Eric Anholt <[email protected]> | 2013-01-23 17:05:10 -0800 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2014-03-26 13:13:26 -0700 |
commit | 3b579882903c577daa1af286a5e0bf5bc122a34d (patch) | |
tree | 79013294b0c19a6d6ad98f6c4a3307bfce3a9bfa /src/mesa/drivers/dri/i965/intel_upload.c | |
parent | b1909b260f6c3855c8214319c602fc7adea7faf9 (diff) |
i965: Massively simplify the intel_upload implementation.
The implementation kept a page-sized area for uploading data, and
uploaded chunks from that to a 64kb-sized streamed buffer. This wasted
cache footprint (and extra state tracking to do so) when we want to just
write our data into the buffer immediately.
Instead, build it around an interface like brw_state_batch() that just
gets you a pointer to BO memory to upload your stuff immediately.
Improves OpenArena on HSW by 1.62209% +/- 0.355299% (n=61) and on BYT by
1.7916% +/- 0.415743% (n=31).
v2: Rebase on Mesa master, drop old prototypes. Re-do performance
comparison on a kernel that doesn't punish CPU efficiency
improvements.
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/intel_upload.c')
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_upload.c | 167 |
1 files changed, 62 insertions, 105 deletions
diff --git a/src/mesa/drivers/dri/i965/intel_upload.c b/src/mesa/drivers/dri/i965/intel_upload.c index ec3109bd441..bb3f615d987 100644 --- a/src/mesa/drivers/dri/i965/intel_upload.c +++ b/src/mesa/drivers/dri/i965/intel_upload.c @@ -57,127 +57,84 @@ intel_upload_finish(struct brw_context *brw) if (!brw->upload.bo) return; - if (brw->upload.buffer_len) { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; - } - + drm_intel_bo_unmap(brw->upload.bo); drm_intel_bo_unreference(brw->upload.bo); brw->upload.bo = NULL; + brw->upload.next_offset = 0; } -static void -wrap_buffers(struct brw_context *brw, GLuint size) -{ - intel_upload_finish(brw); - - if (size < INTEL_UPLOAD_SIZE) - size = INTEL_UPLOAD_SIZE; - - brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "upload", size, 0); - brw->upload.offset = 0; -} - -void -intel_upload_data(struct brw_context *brw, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) -{ - GLuint base, delta; - - base = ALIGN_NPOT(brw->upload.offset, align); - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) { - wrap_buffers(brw, size); - base = 0; - } - - drm_intel_bo_reference(brw->upload.bo); - *return_bo = brw->upload.bo; - *return_offset = base; - - delta = base - brw->upload.offset; - if (brw->upload.buffer_len && - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; - } - - if (size < sizeof(brw->upload.buffer)) { - if (brw->upload.buffer_len == 0) - brw->upload.buffer_offset = base; - else - brw->upload.buffer_len += delta; - - memcpy(brw->upload.buffer + brw->upload.buffer_len, ptr, size); - brw->upload.buffer_len += size; - } else { - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr); - } - - brw->upload.offset = base + size; -} - +/** + * Interface for getting memory for uploading streamed data to the GPU + * + * In most cases, streamed data (for GPU state structures, for example) is + * uploaded through brw_state_batch(), since that interface allows relocations + * from the streamed space returned to other BOs. However, that interface has + * the restriction that the amount of space allocated has to be "small" (see + * estimated_max_prim_size in brw_draw.c). + * + * This interface, on the other hand, is able to handle arbitrary sized + * allocation requests, though it will batch small allocations into the same + * BO for efficiency and reduced memory footprint. + * + * \note The returned pointer is valid only until intel_upload_finish(), which + * will happen at batch flush or the next + * intel_upload_space()/intel_upload_data(). + * + * \param out_bo Pointer to a BO, which must point to a valid BO or NULL on + * entry, and will have a reference to the new BO containing the state on + * return. + * + * \param out_offset Offset within the buffer object that the data will land. + */ void * -intel_upload_map(struct brw_context *brw, GLuint size, GLuint align) +intel_upload_space(struct brw_context *brw, + uint32_t size, + uint32_t alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset) { - GLuint base, delta; - char *ptr; + uint32_t offset; - base = ALIGN_NPOT(brw->upload.offset, align); - if (brw->upload.bo == NULL || base + size > brw->upload.bo->size) { - wrap_buffers(brw, size); - base = 0; + offset = ALIGN_NPOT(brw->upload.next_offset, alignment); + if (brw->upload.bo && offset + size > brw->upload.bo->size) { + intel_upload_finish(brw); + offset = 0; } - delta = base - brw->upload.offset; - if (brw->upload.buffer_len && - brw->upload.buffer_len + delta + size > sizeof(brw->upload.buffer)) { - drm_intel_bo_subdata(brw->upload.bo, - brw->upload.buffer_offset, - brw->upload.buffer_len, - brw->upload.buffer); - brw->upload.buffer_len = 0; + if (!brw->upload.bo) { + brw->upload.bo = drm_intel_bo_alloc(brw->bufmgr, "streamed data", + MAX2(INTEL_UPLOAD_SIZE, size), 4096); + if (brw->has_llc) + drm_intel_bo_map(brw->upload.bo, true); + else + drm_intel_gem_bo_map_gtt(brw->upload.bo); } - if (size <= sizeof(brw->upload.buffer)) { - if (brw->upload.buffer_len == 0) - brw->upload.buffer_offset = base; - else - brw->upload.buffer_len += delta; + brw->upload.next_offset = offset + size; - ptr = brw->upload.buffer + brw->upload.buffer_len; - brw->upload.buffer_len += size; - } else { - ptr = malloc(size); + *out_offset = offset; + if (*out_bo != brw->upload.bo) { + drm_intel_bo_unreference(*out_bo); + *out_bo = brw->upload.bo; + drm_intel_bo_reference(brw->upload.bo); } - return ptr; + return brw->upload.bo->virtual + offset; } +/** + * Handy interface to upload some data to temporary GPU memory quickly. + * + * References to this memory should not be retained across batch flushes. + */ void -intel_upload_unmap(struct brw_context *brw, - const void *ptr, GLuint size, GLuint align, - drm_intel_bo **return_bo, - GLuint *return_offset) +intel_upload_data(struct brw_context *brw, + const void *data, + uint32_t size, + uint32_t alignment, + drm_intel_bo **out_bo, + uint32_t *out_offset) { - GLuint base; - - base = ALIGN_NPOT(brw->upload.offset, align); - if (size > sizeof(brw->upload.buffer)) { - drm_intel_bo_subdata(brw->upload.bo, base, size, ptr); - free((void*)ptr); - } - - drm_intel_bo_reference(brw->upload.bo); - *return_bo = brw->upload.bo; - *return_offset = base; - - brw->upload.offset = base + size; + void *dst = intel_upload_space(brw, size, alignment, out_bo, out_offset); + memcpy(dst, data, size); } |