diff options
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_compute.c | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw.c | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 29 |
5 files changed, 30 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c index 1bad7ac7a0c..7f0278ac92b 100644 --- a/src/mesa/drivers/dri/i965/brw_compute.c +++ b/src/mesa/drivers/dri/i965/brw_compute.c @@ -167,7 +167,6 @@ static void brw_dispatch_compute_common(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); - int estimated_buffer_space_needed; bool fail_next = false; if (!_mesa_check_conditional_render(ctx)) @@ -180,20 +179,11 @@ brw_dispatch_compute_common(struct gl_context *ctx) brw_predraw_resolve_inputs(brw); - const int sampler_state_size = 16; /* 16 bytes */ - estimated_buffer_space_needed = 512; /* batchbuffer commands */ - estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT * - (sampler_state_size + - sizeof(struct gen5_sampler_default_color))); - estimated_buffer_space_needed += 1024; /* push constants */ - estimated_buffer_space_needed += 512; /* misc. pad */ - - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. + /* Flush the batch if the batch/state buffers are nearly full. We can + * grow them if needed, but this is not free, so we'd like to avoid it. */ - intel_batchbuffer_require_space(brw, estimated_buffer_space_needed, - RENDER_RING); + intel_batchbuffer_require_space(brw, 600, RENDER_RING); + brw_require_statebuffer_space(brw, 2500); intel_batchbuffer_save_state(brw); retry: diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index d1ec2e3f09d..06c6ed72c98 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -669,26 +669,16 @@ brw_try_draw_prims(struct gl_context *ctx, brw->ctx.NewDriverState |= BRW_NEW_VERTICES; for (i = 0; i < nr_prims; i++) { - int estimated_max_prim_size; - const int sampler_state_size = 16; - - estimated_max_prim_size = 512; /* batchbuffer commands */ - estimated_max_prim_size += BRW_MAX_TEX_UNIT * - (sampler_state_size + sizeof(struct gen5_sampler_default_color)); - estimated_max_prim_size += 1024; /* gen6 VS push constants */ - estimated_max_prim_size += 1024; /* gen6 WM push constants */ - estimated_max_prim_size += 512; /* misc. pad */ - /* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have * atoms that happen on every draw call. */ brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL; - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. + /* Flush the batch if the batch/state buffers are nearly full. We can + * grow them if needed, but this is not free, so we'd like to avoid it. */ - intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING); + intel_batchbuffer_require_space(brw, 1500, RENDER_RING); + brw_require_statebuffer_space(brw, 2400); intel_batchbuffer_save_state(brw); if (brw->num_instances != prims[i].num_instances || diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index c8b71e72de5..9718739dea9 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -185,6 +185,7 @@ void brw_destroy_caches( struct brw_context *brw ); void brw_print_program_cache(struct brw_context *brw); /* intel_batchbuffer.c */ +void brw_require_statebuffer_space(struct brw_context *brw, int size); void *brw_state_batch(struct brw_context *brw, int size, int alignment, uint32_t *out_offset); uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset); diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 5bff7eaff59..3fe81c7c6a1 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -205,7 +205,6 @@ genX(blorp_exec)(struct blorp_batch *batch, assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; struct gl_context *ctx = &brw->ctx; - const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1700; bool check_aperture_failed_once = false; /* Flush the sampler and render caches. We definitely need to flush the @@ -222,7 +221,8 @@ genX(blorp_exec)(struct blorp_batch *batch, brw_select_pipeline(brw, BRW_RENDER_PIPELINE); retry: - intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING); + intel_batchbuffer_require_space(brw, 1400, RENDER_RING); + brw_require_statebuffer_space(brw, 600); intel_batchbuffer_save_state(brw); brw->no_batch_wrap = true; diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 5aa34e74293..fddc84fcf9b 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -49,8 +49,8 @@ * should flush. Each time we flush the batch, we recreate both buffers * at the original target size, so it doesn't grow without bound. */ -#define BATCH_SZ (8192*sizeof(uint32_t)) -#define STATE_SZ (8192*sizeof(uint32_t)) +#define BATCH_SZ (20 * 1024) +#define STATE_SZ (16 * 1024) /* The kernel assumes batchbuffers are smaller than 256kB. */ #define MAX_BATCH_SIZE (256 * 1024) @@ -369,9 +369,8 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, intel_batchbuffer_flush(brw); } - /* For now, flush as if the batch and state buffers still shared a BO */ const unsigned batch_used = USED_BATCH(*batch) * 4; - if (batch_used + sz >= BATCH_SZ - batch->state_used) { + if (batch_used + sz >= BATCH_SZ) { if (!brw->no_batch_wrap) { intel_batchbuffer_flush(brw); } else { @@ -380,7 +379,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map, batch_used, new_size); batch->map_next = (void *) batch->map + batch_used; - assert(batch_used + sz < batch->bo->size - batch->state_used); + assert(batch_used + sz < batch->bo->size); } } @@ -1012,6 +1011,19 @@ brw_state_batch_size(struct brw_context *brw, uint32_t offset) } /** + * Reserve some space in the statebuffer, or flush. + * + * This is used to estimate when we're near the end of the batch, + * so we can flush early. + */ +void +brw_require_statebuffer_space(struct brw_context *brw, int size) +{ + if (brw->batch.state_used + size >= STATE_SZ) + intel_batchbuffer_flush(brw); +} + +/** * Allocates a block of space in the batchbuffer for indirect state. */ void * @@ -1026,10 +1038,7 @@ brw_state_batch(struct brw_context *brw, uint32_t offset = ALIGN(batch->state_used, alignment); - /* For now, follow the old flushing behavior. */ - int batch_space = USED_BATCH(*batch) * 4; - - if (offset + size >= STATE_SZ - batch_space) { + if (offset + size >= STATE_SZ) { if (!brw->no_batch_wrap) { intel_batchbuffer_flush(brw); offset = ALIGN(batch->state_used, alignment); @@ -1039,7 +1048,7 @@ brw_state_batch(struct brw_context *brw, MAX_STATE_SIZE); grow_buffer(brw, &batch->state_bo, &batch->state_map, &batch->state_cpu_map, batch->state_used, new_size); - assert(offset + size < batch->state_bo->size - batch_space); + assert(offset + size < batch->state_bo->size); } } |