From 609ad99a1a4b3a59436c520b355f482dff64b34a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 10 Jan 2008 14:43:16 -0800 Subject: [965] Improve performance by allocating CURBE buffers a page at a time. Since each one is only 64b, and kernel allocations are a page anyway, this lets us reduce buffer allocation by packing many CURBEs into one buffer, for each batchbuffer submitted. Improves openarena performance by around 10%. --- src/mesa/drivers/dri/i965/brw_context.h | 4 ++++ src/mesa/drivers/dri/i965/brw_curbe.c | 35 ++++++++++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_vtbl.c | 3 +++ 3 files changed, 33 insertions(+), 9 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9ddd41d3f39..7a2073d7c16 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -538,6 +538,10 @@ struct brw_context struct brw_tracked_state tracked_state; dri_bo *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; GLfloat *last_buf; GLuint last_bufsz; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 2e39ec48055..f41f659b331 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -286,7 +286,8 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } - if (brw->curbe.last_buf && + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); @@ -297,16 +298,32 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; - dri_bo_unreference(brw->curbe.curbe_bo); - brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", - bufsz, 1 << 6, - DRM_BO_FLAG_MEM_LOCAL | - DRM_BO_FLAG_CACHED | - DRM_BO_FLAG_CACHED_MAPPED); + if (brw->curbe.curbe_bo != NULL && + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) + { + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + } + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this batchbuffer. + * They're generally around 64b. + */ + brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", + 4096, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + brw->curbe.curbe_next_offset = 0; + } + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ - dri_bo_subdata(brw->curbe.curbe_bo, 0, bufsz, buf); + dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); } /* Because this provokes an action (ie copy the constants into the @@ -325,7 +342,7 @@ static void upload_constant_buffer(struct brw_context *brw) BEGIN_BATCH(2, IGNORE_CLIPRECTS); OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - (sz - 1)); + (sz - 1) + brw->curbe.curbe_offset); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index e9fed4dae10..126e6558395 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -94,6 +94,9 @@ static void brw_new_batch( struct intel_context *intel ) /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!brw->no_batch_wrap); + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + /* Mark all context state as needing to be re-emitted. * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. -- cgit v1.2.3