5 files changed, 30 insertions, 40 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_compute.c b/src/mesa/drivers/dri/i965/brw_compute.c
index 1bad7ac7a0c..7f0278ac92b 100644
--- a/src/mesa/drivers/dri/i965/brw_compute.c
+++ b/src/mesa/drivers/dri/i965/brw_compute.c
@@ -167,7 +167,6 @@ static void
 brw_dispatch_compute_common(struct gl_context *ctx)
 {
    struct brw_context *brw = brw_context(ctx);
-   int estimated_buffer_space_needed;
    bool fail_next = false;
 
    if (!_mesa_check_conditional_render(ctx))
@@ -180,20 +179,11 @@ brw_dispatch_compute_common(struct gl_context *ctx)
 
    brw_predraw_resolve_inputs(brw);
 
-   const int sampler_state_size = 16; /* 16 bytes */
-   estimated_buffer_space_needed = 512; /* batchbuffer commands */
-   estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
-                                     (sampler_state_size +
-                                      sizeof(struct gen5_sampler_default_color)));
-   estimated_buffer_space_needed += 1024; /* push constants */
-   estimated_buffer_space_needed += 512; /* misc. pad */
-
-   /* Flush the batch if it's approaching full, so that we don't wrap while
-    * we've got validated state that needs to be in the same batch as the
-    * primitives.
+   /* Flush the batch if the batch/state buffers are nearly full.  We can
+    * grow them if needed, but this is not free, so we'd like to avoid it.
     */
-   intel_batchbuffer_require_space(brw, estimated_buffer_space_needed,
-                                   RENDER_RING);
+   intel_batchbuffer_require_space(brw, 600, RENDER_RING);
+   brw_require_statebuffer_space(brw, 2500);
    intel_batchbuffer_save_state(brw);
 
  retry:
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index d1ec2e3f09d..06c6ed72c98 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -669,26 +669,16 @@ brw_try_draw_prims(struct gl_context *ctx,
    brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
 
    for (i = 0; i < nr_prims; i++) {
-      int estimated_max_prim_size;
-      const int sampler_state_size = 16;
-
-      estimated_max_prim_size = 512; /* batchbuffer commands */
-      estimated_max_prim_size += BRW_MAX_TEX_UNIT *
-         (sampler_state_size + sizeof(struct gen5_sampler_default_color));
-      estimated_max_prim_size += 1024; /* gen6 VS push constants */
-      estimated_max_prim_size += 1024; /* gen6 WM push constants */
-      estimated_max_prim_size += 512; /* misc. pad */
-
       /* Flag BRW_NEW_DRAW_CALL on every draw.  This allows us to have
        * atoms that happen on every draw call.
        */
       brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
 
-      /* Flush the batch if it's approaching full, so that we don't wrap while
-       * we've got validated state that needs to be in the same batch as the
-       * primitives.
+      /* Flush the batch if the batch/state buffers are nearly full.  We can
+       * grow them if needed, but this is not free, so we'd like to avoid it.
        */
-      intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
+      intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+      brw_require_statebuffer_space(brw, 2400);
       intel_batchbuffer_save_state(brw);
 
       if (brw->num_instances != prims[i].num_instances ||
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index c8b71e72de5..9718739dea9 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -185,6 +185,7 @@ void brw_destroy_caches( struct brw_context *brw );
 void brw_print_program_cache(struct brw_context *brw);
 
 /* intel_batchbuffer.c */
+void brw_require_statebuffer_space(struct brw_context *brw, int size);
 void *brw_state_batch(struct brw_context *brw,
                       int size, int alignment, uint32_t *out_offset);
 uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset);
diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
index 5bff7eaff59..3fe81c7c6a1 100644
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -205,7 +205,6 @@ genX(blorp_exec)(struct blorp_batch *batch,
    assert(batch->blorp->driver_ctx == batch->driver_batch);
    struct brw_context *brw = batch->driver_batch;
    struct gl_context *ctx = &brw->ctx;
-   const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1700;
    bool check_aperture_failed_once = false;
 
    /* Flush the sampler and render caches.  We definitely need to flush the
@@ -222,7 +221,8 @@ genX(blorp_exec)(struct blorp_batch *batch,
    brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
 
 retry:
-   intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
+   intel_batchbuffer_require_space(brw, 1400, RENDER_RING);
+   brw_require_statebuffer_space(brw, 600);
    intel_batchbuffer_save_state(brw);
    brw->no_batch_wrap = true;
 
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 5aa34e74293..fddc84fcf9b 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -49,8 +49,8 @@
  * should flush.  Each time we flush the batch, we recreate both buffers
  * at the original target size, so it doesn't grow without bound.
  */
-#define BATCH_SZ (8192*sizeof(uint32_t))
-#define STATE_SZ (8192*sizeof(uint32_t))
+#define BATCH_SZ (20 * 1024)
+#define STATE_SZ (16 * 1024)
 
 /* The kernel assumes batchbuffers are smaller than 256kB. */
 #define MAX_BATCH_SIZE (256 * 1024)
@@ -369,9 +369,8 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
       intel_batchbuffer_flush(brw);
    }
 
-   /* For now, flush as if the batch and state buffers still shared a BO */
    const unsigned batch_used = USED_BATCH(*batch) * 4;
-   if (batch_used + sz >= BATCH_SZ - batch->state_used) {
+   if (batch_used + sz >= BATCH_SZ) {
       if (!brw->no_batch_wrap) {
          intel_batchbuffer_flush(brw);
       } else {
@@ -380,7 +379,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
          grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map,
                      batch_used, new_size);
          batch->map_next = (void *) batch->map + batch_used;
-         assert(batch_used + sz < batch->bo->size - batch->state_used);
+         assert(batch_used + sz < batch->bo->size);
       }
    }
 
@@ -1012,6 +1011,19 @@ brw_state_batch_size(struct brw_context *brw, uint32_t offset)
 }
 
 /**
+ * Reserve some space in the statebuffer, or flush.
+ *
+ * This is used to estimate when we're near the end of the batch,
+ * so we can flush early.
+ */
+void
+brw_require_statebuffer_space(struct brw_context *brw, int size)
+{
+   if (brw->batch.state_used + size >= STATE_SZ)
+      intel_batchbuffer_flush(brw);
+}
+
+/**
  * Allocates a block of space in the batchbuffer for indirect state.
  */
 void *
@@ -1026,10 +1038,7 @@ brw_state_batch(struct brw_context *brw,
 
    uint32_t offset = ALIGN(batch->state_used, alignment);
 
-   /* For now, follow the old flushing behavior. */
-   int batch_space = USED_BATCH(*batch) * 4;
-
-   if (offset + size >= STATE_SZ - batch_space) {
+   if (offset + size >= STATE_SZ) {
       if (!brw->no_batch_wrap) {
          intel_batchbuffer_flush(brw);
          offset = ALIGN(batch->state_used, alignment);
@@ -1039,7 +1048,7 @@ brw_state_batch(struct brw_context *brw,
                  MAX_STATE_SIZE);
          grow_buffer(brw, &batch->state_bo, &batch->state_map,
                      &batch->state_cpu_map, batch->state_used, new_size);
-         assert(offset + size < batch->state_bo->size - batch_space);
+         assert(offset + size < batch->state_bo->size);
       }
    }