diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen4_blorp_exec.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_blorp_exec.c | 22 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/genX_state_upload.c | 31 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 109 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.h | 23 |
7 files changed, 141 insertions, 82 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 89dacf3874c..6968366d6c8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -447,6 +447,8 @@ struct intel_batchbuffer { struct brw_bo *bo; /** Last BO submitted to the hardware. Used for glFinish(). */ struct brw_bo *last_bo; + /** Current statebuffer being queued up. */ + struct brw_bo *state_bo; #ifdef DEBUG uint16_t emit, total; @@ -454,15 +456,18 @@ struct intel_batchbuffer { uint16_t reserved_space; uint32_t *map_next; uint32_t *map; - uint32_t *cpu_map; + uint32_t *batch_cpu_map; + uint32_t *state_cpu_map; + uint32_t *state_map; + uint32_t state_used; - uint32_t state_batch_offset; enum brw_gpu_ring ring; bool use_batch_first; bool needs_sol_reset; bool state_base_address_emitted; struct brw_reloc_list batch_relocs; + struct brw_reloc_list state_relocs; unsigned int valid_reloc_flags; /** The validation list */ @@ -477,6 +482,7 @@ struct intel_batchbuffer { struct { uint32_t *map_next; int batch_reloc_count; + int state_reloc_count; int exec_count; } saved; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9b8ae70f103..53137cc4524 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -65,15 +65,15 @@ upload_pipelined_state_pointers(struct brw_context *brw) BEGIN_BATCH(7); OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->batch.bo, 0, brw->vs.base.state_offset); + OUT_RELOC(brw->batch.state_bo, 0, brw->vs.base.state_offset); if (brw->ff_gs.prog_active) - OUT_RELOC(brw->batch.bo, 0, brw->ff_gs.state_offset | 1); + OUT_RELOC(brw->batch.state_bo, 0, brw->ff_gs.state_offset | 1); else OUT_BATCH(0); - OUT_RELOC(brw->batch.bo, 0, brw->clip.state_offset | 1); - OUT_RELOC(brw->batch.bo, 0, brw->sf.state_offset); - OUT_RELOC(brw->batch.bo, 0, brw->wm.base.state_offset); - OUT_RELOC(brw->batch.bo, 0, brw->cc.state_offset); + OUT_RELOC(brw->batch.state_bo, 0, brw->clip.state_offset | 1); + OUT_RELOC(brw->batch.state_bo, 0, brw->sf.state_offset); + OUT_RELOC(brw->batch.state_bo, 0, brw->wm.base.state_offset); + OUT_RELOC(brw->batch.state_bo, 0, brw->cc.state_offset); ADVANCE_BATCH(); brw->ctx.NewDriverState |= BRW_NEW_PSP; @@ -638,9 +638,9 @@ brw_upload_state_base_address(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(mocs_wb << 16); /* Surface state base address: */ - OUT_RELOC64(brw->batch.bo, 0, mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1); /* Dynamic state base address: */ - OUT_RELOC64(brw->batch.bo, 0, mocs_wb << 4 | 1); + OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1); /* Indirect object base address: MEDIA_OBJECT data */ OUT_BATCH(mocs_wb << 4 | 1); OUT_BATCH(0); @@ -650,7 +650,7 @@ brw_upload_state_base_address(struct brw_context *brw) /* General state buffer size */ OUT_BATCH(0xfffff001); /* Dynamic state buffer size */ - OUT_BATCH(ALIGN(brw->batch.bo->size, 4096) | 1); + OUT_BATCH(ALIGN(brw->batch.state_bo->size, 4096) | 1); /* Indirect object upper bound */ OUT_BATCH(0xfffff001); /* Instruction access upper bound */ @@ -673,7 +673,7 @@ brw_upload_state_base_address(struct brw_context *brw) * BINDING_TABLE_STATE * SURFACE_STATE */ - OUT_RELOC(brw->batch.bo, 0, 1); + OUT_RELOC(brw->batch.state_bo, 0, 1); /* Dynamic state base address: * SAMPLER_STATE * SAMPLER_BORDER_COLOR_STATE @@ -684,7 +684,7 @@ brw_upload_state_base_address(struct brw_context *brw) * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset * Disable is clear, which we rely on) */ - OUT_RELOC(brw->batch.bo, 0, 1); + OUT_RELOC(brw->batch.state_bo, 0, 1); OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ @@ -705,7 +705,7 @@ brw_upload_state_base_address(struct brw_context *brw) BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.bo, 0, 1); /* Surface state base address */ + OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */ OUT_BATCH(0xfffff001); /* General state upper bound */ @@ -716,7 +716,7 @@ brw_upload_state_base_address(struct brw_context *brw) BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(brw->batch.bo, 0, 1); /* Surface state base address */ + OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h index fe02ab52f1f..798496ffffe 100644 --- a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h +++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h @@ -28,7 +28,7 @@ dynamic_state_address(struct blorp_batch *batch, uint32_t offset) struct brw_context *brw = batch->driver_batch; return (struct blorp_address) { - .buffer = brw->batch.bo, + .buffer = brw->batch.state_bo, .offset = offset, }; } diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index d1ff8ab4b73..feb87923ccb 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -57,8 +57,18 @@ blorp_emit_reloc(struct blorp_batch *batch, { assert(batch->blorp->driver_ctx == batch->driver_batch); struct brw_context *brw = batch->driver_batch; + uint32_t offset; + + if (GEN_GEN < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) { + offset = (char *)location - (char *)brw->batch.state_map; + return brw_state_reloc(&brw->batch, offset, + address.buffer, address.offset + delta, + address.reloc_flags); + } + + assert(!brw_ptr_in_state_buffer(&brw->batch, location)); - uint32_t offset = (char *)location - (char *)brw->batch.map; + offset = (char *)location - (char *)brw->batch.map; return brw_batch_reloc(&brw->batch, offset, address.buffer, address.offset + delta, address.reloc_flags); @@ -76,7 +86,7 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta, address.reloc_flags); - void *reloc_ptr = (void *)brw->batch.map + ss_offset; + void *reloc_ptr = (void *)brw->batch.state_map + ss_offset; #if GEN_GEN >= 8 *(uint64_t *)reloc_ptr = reloc_val; #else @@ -140,7 +150,7 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, void *data = brw_state_batch(brw, size, 64, &offset); *addr = (struct blorp_address) { - .buffer = brw->batch.bo, + .buffer = brw->batch.state_bo, .offset = offset, }; @@ -216,7 +226,7 @@ retry: intel_batchbuffer_save_state(brw); struct brw_bo *saved_bo = brw->batch.bo; uint32_t saved_used = USED_BATCH(brw->batch); - uint32_t saved_state_batch_offset = brw->batch.state_batch_offset; + uint32_t saved_state_used = brw->batch.state_used; #if GEN_GEN == 6 /* Emit workaround flushes when we switch from drawing to blorping. */ @@ -249,12 +259,12 @@ retry: */ assert(brw->batch.bo == saved_bo); assert((USED_BATCH(brw->batch) - saved_used) * 4 + - (saved_state_batch_offset - brw->batch.state_batch_offset) < + (brw->batch.state_used - saved_state_used) < estimated_max_batch_usage); /* Shut up compiler warnings on release build */ (void)saved_bo; (void)saved_used; - (void)saved_state_batch_offset; + (void)saved_state_used; /* Check if the blorp op we just did would make our batch likely to fail to * map all the BOs into the GPU at batch exec time later. If so, flush the diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index ea488d72356..612761601a2 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -82,13 +82,23 @@ static uint64_t __gen_combine_address(struct brw_context *brw, void *location, struct brw_address address, uint32_t delta) { + struct intel_batchbuffer *batch = &brw->batch; + uint32_t offset; + if (address.bo == NULL) { return address.offset + delta; } else { - uint32_t offset = (char *) location - (char *) brw->batch.map; + if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) { + offset = (char *) location - (char *) brw->batch.state_map; + return brw_state_reloc(batch, offset, address.bo, + address.offset + delta, + address.reloc_flags); + } + + assert(!brw_ptr_in_state_buffer(batch, location)); - /* TODO: Use brw_state_reloc for some things on Gen4-5 */ - return brw_batch_reloc(&brw->batch, offset, address.bo, + offset = (char *) location - (char *) brw->batch.map; + return brw_batch_reloc(batch, offset, address.bo, address.offset + delta, address.reloc_flags); } @@ -1272,7 +1282,7 @@ genX(upload_clip_state)(struct brw_context *brw) clip.GuardbandClipTestEnable = true; clip.ClipperViewportStatePointer = - ro_bo(brw->batch.bo, brw->clip.vp_offset); + ro_bo(brw->batch.state_bo, brw->clip.vp_offset); clip.ScreenSpaceViewportXMin = -1; clip.ScreenSpaceViewportXMax = 1; @@ -1488,7 +1498,8 @@ genX(upload_sf)(struct brw_context *brw) * something loaded through the GPE (L2 ISC), so it's INSTRUCTION * domain. */ - sf.SetupViewportStateOffset = ro_bo(brw->batch.bo, brw->sf.vp_offset); + sf.SetupViewportStateOffset = + ro_bo(brw->batch.state_bo, brw->sf.vp_offset); sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT; @@ -1781,7 +1792,7 @@ genX(upload_wm)(struct brw_context *brw) if (stage_state->sampler_count) wm.SamplerStatePointer = - ro_bo(brw->batch.bo, stage_state->sampler_offset); + ro_bo(brw->batch.state_bo, stage_state->sampler_offset); #if GEN_GEN == 5 if (wm_prog_data->prog_offset_2) wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2; @@ -2074,7 +2085,7 @@ genX(upload_vs_state)(struct brw_context *brw) vs.StatisticsEnable = false; vs.SamplerStatePointer = - ro_bo(brw->batch.bo, stage_state->sampler_offset); + ro_bo(brw->batch.state_bo, stage_state->sampler_offset); #endif #if GEN_GEN == 5 @@ -3318,7 +3329,8 @@ genX(upload_color_calc_state)(struct brw_context *brw) cc.StatisticsEnable = brw->stats_wm; - cc.CCViewportStatePointer = ro_bo(brw->batch.bo, brw->cc.vp_offset); + cc.CCViewportStatePointer = + ro_bo(brw->batch.state_bo, brw->cc.vp_offset); #else /* _NEW_COLOR */ cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0]; @@ -5010,7 +5022,8 @@ genX(update_sampler_state)(struct brw_context *brw, &border_color_offset); } #if GEN_GEN < 6 - samp_st.BorderColorPointer = ro_bo(brw->batch.bo, border_color_offset); + samp_st.BorderColorPointer = + ro_bo(brw->batch.state_bo, border_color_offset); #else samp_st.BorderColorPointer = border_color_offset; #endif diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 8080e77b251..074bb74f99f 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -41,6 +41,7 @@ #define FILE_DEBUG_FLAG DEBUG_BUFMGR #define BATCH_SZ (8192*sizeof(uint32_t)) +#define STATE_SZ (8192*sizeof(uint32_t)) static void intel_batchbuffer_reset(struct intel_batchbuffer *batch, @@ -74,12 +75,15 @@ intel_batchbuffer_init(struct intel_screen *screen, const struct gen_device_info *devinfo = &screen->devinfo; if (!devinfo->has_llc) { - batch->cpu_map = malloc(BATCH_SZ); - batch->map = batch->cpu_map; - batch->map_next = batch->cpu_map; + batch->batch_cpu_map = malloc(BATCH_SZ); + batch->map = batch->batch_cpu_map; + batch->map_next = batch->map; + batch->state_cpu_map = malloc(STATE_SZ); + batch->state_map = batch->state_cpu_map; } init_reloc_list(&batch->batch_relocs, 250); + init_reloc_list(&batch->state_relocs, 250); batch->exec_count = 0; batch->exec_array_size = 100; @@ -161,16 +165,28 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch, batch->last_bo = batch->bo; batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); - if (devinfo->has_llc) { + if (!batch->batch_cpu_map) { batch->map = brw_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); } batch->map_next = batch->map; + batch->state_bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096); + batch->state_bo->kflags = + can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0; + if (!batch->state_cpu_map) { + batch->state_map = + brw_bo_map(NULL, batch->state_bo, MAP_READ | MAP_WRITE); + } + + /* Avoid making 0 a valid state offset - otherwise the decoder will try + * and decode data when we use offset 0 as a null pointer. + */ + batch->state_used = 1; + add_exec_bo(batch, batch->bo); assert(batch->bo->index == 0); batch->reserved_space = BATCH_RESERVED; - batch->state_batch_offset = batch->bo->size; batch->needs_sol_reset = false; batch->state_base_address_emitted = false; @@ -195,6 +211,7 @@ intel_batchbuffer_save_state(struct brw_context *brw) { brw->batch.saved.map_next = brw->batch.map_next; brw->batch.saved.batch_reloc_count = brw->batch.batch_relocs.reloc_count; + brw->batch.saved.state_reloc_count = brw->batch.state_relocs.reloc_count; brw->batch.saved.exec_count = brw->batch.exec_count; } @@ -206,6 +223,7 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) brw_bo_unreference(brw->batch.exec_bos[i]); } brw->batch.batch_relocs.reloc_count = brw->batch.saved.batch_reloc_count; + brw->batch.state_relocs.reloc_count = brw->batch.saved.state_reloc_count; brw->batch.exec_count = brw->batch.saved.exec_count; brw->batch.map_next = brw->batch.saved.map_next; @@ -216,17 +234,20 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) void intel_batchbuffer_free(struct intel_batchbuffer *batch) { - free(batch->cpu_map); + free(batch->batch_cpu_map); + free(batch->state_cpu_map); for (int i = 0; i < batch->exec_count; i++) { brw_bo_unreference(batch->exec_bos[i]); } free(batch->batch_relocs.relocs); + free(batch->state_relocs.relocs); free(batch->exec_bos); free(batch->validation_list); brw_bo_unreference(batch->last_bo); brw_bo_unreference(batch->bo); + brw_bo_unreference(batch->state_bo); if (batch->state_batch_sizes) _mesa_hash_table_destroy(batch->state_batch_sizes, NULL); } @@ -236,6 +257,7 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, enum brw_gpu_ring ring) { const struct gen_device_info *devinfo = &brw->screen->devinfo; + struct intel_batchbuffer *batch = &brw->batch; /* If we're switching rings, implicitly flush the batch. */ if (unlikely(ring != brw->batch.ring) && brw->batch.ring != UNKNOWN_RING && @@ -243,7 +265,9 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz, intel_batchbuffer_flush(brw); } - if (intel_batchbuffer_space(&brw->batch) < sz) + /* For now, flush as if the batch and state buffers still shared a BO */ + if (USED_BATCH(*batch) * 4 + sz >= + BATCH_SZ - batch->reserved_space - batch->state_used) intel_batchbuffer_flush(brw); /* The intel_batchbuffer_flush() calls above might have changed @@ -301,7 +325,7 @@ do_batch_dump(struct brw_context *brw) return; uint32_t *batch_data = brw_bo_map(brw, batch->bo, MAP_READ); - uint32_t *state = batch_data; + uint32_t *state = brw_bo_map(brw, batch->state_bo, MAP_READ); if (batch == NULL || state == NULL) { fprintf(stderr, "WARNING: failed to map batchbuffer/statebuffer\n"); return; @@ -309,7 +333,7 @@ do_batch_dump(struct brw_context *brw) uint32_t *end = batch_data + USED_BATCH(*batch); uint32_t batch_gtt_offset = batch->bo->gtt_offset; - uint32_t state_gtt_offset = batch->bo->gtt_offset; + uint32_t state_gtt_offset = batch->state_bo->gtt_offset; int length; bool color = INTEL_DEBUG & DEBUG_COLOR; @@ -431,6 +455,7 @@ do_batch_dump(struct brw_context *brw) } brw_bo_unmap(batch->bo); + brw_bo_unmap(batch->state_bo); } #else static void do_batch_dump(struct brw_context *brw) { } @@ -448,9 +473,12 @@ brw_new_batch(struct brw_context *brw) brw->batch.exec_bos[i] = NULL; } brw->batch.batch_relocs.reloc_count = 0; + brw->batch.state_relocs.reloc_count = 0; brw->batch.exec_count = 0; brw->batch.aperture_space = 0; + brw_bo_unreference(brw->batch.state_bo); + /* Create a new batchbuffer and reset the associated state: */ intel_batchbuffer_reset_and_clear_render_cache(brw); @@ -632,15 +660,18 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) struct intel_batchbuffer *batch = &brw->batch; int ret = 0; - if (batch->cpu_map) { + if (batch->batch_cpu_map) { void *bo_map = brw_bo_map(brw, batch->bo, MAP_WRITE); - memcpy(bo_map, batch->cpu_map, 4 * USED_BATCH(*batch)); - memcpy(bo_map + batch->state_batch_offset, - (char *) batch->cpu_map + batch->state_batch_offset, - batch->bo->size - batch->state_batch_offset); + memcpy(bo_map, batch->batch_cpu_map, 4 * USED_BATCH(*batch)); + } + + if (batch->state_cpu_map) { + void *bo_map = brw_bo_map(brw, batch->state_bo, MAP_WRITE); + memcpy(bo_map, batch->state_cpu_map, batch->state_used); } brw_bo_unmap(batch->bo); + brw_bo_unmap(batch->state_bo); if (!brw->screen->no_hw) { /* The requirement for using I915_EXEC_NO_RELOC are: @@ -667,6 +698,18 @@ do_flush_locked(struct brw_context *brw, int in_fence_fd, int *out_fence_fd) uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0; + /* Set statebuffer relocations */ + const unsigned state_index = batch->state_bo->index; + if (state_index < batch->exec_count && + batch->exec_bos[state_index] == batch->state_bo) { + struct drm_i915_gem_exec_object2 *entry = + &batch->validation_list[state_index]; + assert(entry->handle == batch->state_bo->gem_handle); + entry->relocation_count = batch->state_relocs.reloc_count; + entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs; + } + + /* Set batchbuffer relocations */ struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0]; assert(entry->handle == batch->bo->gem_handle); entry->relocation_count = batch->batch_relocs.reloc_count; @@ -729,13 +772,11 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw, if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { int bytes_for_commands = 4 * USED_BATCH(brw->batch); - int bytes_for_state = brw->batch.bo->size - brw->batch.state_batch_offset; - int total_bytes = bytes_for_commands + bytes_for_state; - fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (pkt) + " - "%4db (state) = %4db (%0.1f%%)\n", file, line, - bytes_for_commands, bytes_for_state, - total_bytes, - 100.0f * total_bytes / BATCH_SZ); + int bytes_for_state = brw->batch.state_used; + fprintf(stderr, "%s:%d: Batchbuffer flush with %4db (%0.1f%%) (pkt) + " + "%4db (%0.1f%%) (state)\n", file, line, + bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ, + bytes_for_state, 100.0f * bytes_for_state / STATE_SZ); } brw->batch.reserved_space = 0; @@ -842,9 +883,9 @@ brw_state_reloc(struct intel_batchbuffer *batch, uint32_t state_offset, struct brw_bo *target, uint32_t target_offset, unsigned int reloc_flags) { - assert(state_offset <= batch->bo->size - sizeof(uint32_t)); + assert(state_offset <= batch->state_bo->size - sizeof(uint32_t)); - return emit_reloc(batch, &batch->batch_relocs, state_offset, + return emit_reloc(batch, &batch->state_relocs, state_offset, target, target_offset, reloc_flags); } @@ -868,31 +909,29 @@ brw_state_batch(struct brw_context *brw, uint32_t *out_offset) { struct intel_batchbuffer *batch = &brw->batch; - uint32_t offset; assert(size < batch->bo->size); - offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); - /* If allocating from the top would wrap below the batchbuffer, or - * if the batch's used space (plus the reserved pad) collides with our - * space, then flush and try again. - */ - if (batch->state_batch_offset < size || - offset < 4 * USED_BATCH(*batch) + batch->reserved_space) { + uint32_t offset = ALIGN(batch->state_used, alignment); + + /* For now, follow the old flushing behavior. */ + int batch_space = batch->reserved_space + USED_BATCH(*batch) * 4; + + if (offset + size >= STATE_SZ - batch_space) { intel_batchbuffer_flush(brw); - offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); + offset = ALIGN(batch->state_used, alignment); } - batch->state_batch_offset = offset; - if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { _mesa_hash_table_insert(batch->state_batch_sizes, (void *) (uintptr_t) offset, (void *) (uintptr_t) size); } + batch->state_used = offset + size; + *out_offset = offset; - return batch->map + (offset>>2); + return batch->state_map + (offset >> 2); } void diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 6eb1014c1cb..8a2e3cfc9bb 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -95,25 +95,9 @@ static inline uint32_t float_as_int(float f) return fi.d; } -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static inline unsigned -intel_batchbuffer_space(struct intel_batchbuffer *batch) -{ - return (batch->state_batch_offset - batch->reserved_space) - - USED_BATCH(*batch) * 4; -} - - static inline void intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) { -#ifdef DEBUG - assert(intel_batchbuffer_space(batch) >= 4); -#endif *batch->map_next++ = dword; assert(batch->ring != UNKNOWN_RING); } @@ -153,6 +137,13 @@ intel_batchbuffer_advance(struct brw_context *brw) #endif } +static inline bool +brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p) +{ + return (char *) p >= (char *) batch->state_map && + (char *) p < (char *) batch->state_map + batch->state_bo->size; +} + #define BEGIN_BATCH(n) do { \ intel_batchbuffer_begin(brw, (n), RENDER_RING); \ uint32_t *__map = brw->batch.map_next; \ |