diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw_upload.c | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_urb.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.c | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_batchbuffer.h | 55 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_blit.c | 19 |
6 files changed, 70 insertions, 42 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 44d1aeaf54a..34a49b2abdc 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -873,7 +873,8 @@ struct intel_batchbuffer { #ifdef DEBUG uint16_t emit, total; #endif - uint16_t used, reserved_space; + uint16_t reserved_space; + uint32_t *map_next; uint32_t *map; uint32_t *cpu_map; #define BATCH_SZ (8192*sizeof(uint32_t)) @@ -883,7 +884,7 @@ struct intel_batchbuffer { bool needs_sol_reset; struct { - uint16_t used; + uint32_t *map_next; int reloc_count; } saved; }; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index e0b55c6b620..c95f0c37f89 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -604,14 +604,15 @@ brw_prepare_shader_draw_parameters(struct brw_context *brw) /** * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS). */ -static void +static uint32_t * emit_vertex_buffer_state(struct brw_context *brw, unsigned buffer_nr, drm_intel_bo *bo, unsigned bo_ending_address, unsigned bo_offset, unsigned stride, - unsigned step_rate) + unsigned step_rate, + uint32_t *__map) { struct gl_context *ctx = &brw->ctx; uint32_t dw0; @@ -643,7 +644,10 @@ emit_vertex_buffer_state(struct brw_context *brw, OUT_BATCH(0); } OUT_BATCH(step_rate); + + return __map; } +#define EMIT_VERTEX_BUFFER_STATE(...) __map = emit_vertex_buffer_state(__VA_ARGS__, __map) static void brw_emit_vertices(struct brw_context *brw) { @@ -704,14 +708,14 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1)); for (i = 0; i < brw->vb.nr_buffers; i++) { struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; - emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1, + EMIT_VERTEX_BUFFER_STATE(brw, i, buffer->bo, buffer->bo->size - 1, buffer->offset, buffer->stride, buffer->step_rate); } if (brw->vs.prog_data->uses_vertexid) { - emit_vertex_buffer_state(brw, brw->vb.nr_buffers, + EMIT_VERTEX_BUFFER_STATE(brw, brw->vb.nr_buffers, brw->draw.draw_params_bo, brw->draw.draw_params_bo->size - 1, brw->draw.draw_params_offset, diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 8fc06ba7cd9..6078c3810d4 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -252,7 +252,7 @@ void brw_upload_urb_fence(struct brw_context *brw) if ((USED_BATCH(brw->batch) & 15) > 12) { int pad = 16 - (USED_BATCH(brw->batch) & 15); do - brw->batch.map[brw->batch.used++] = MI_NOOP; + *brw->batch.map_next++ = MI_NOOP; while (--pad); } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 628a7b774b2..088ffd276b4 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -48,6 +48,7 @@ intel_batchbuffer_init(struct brw_context *brw) if (!brw->has_llc) { brw->batch.cpu_map = malloc(BATCH_SZ); brw->batch.map = brw->batch.cpu_map; + brw->batch.map_next = brw->batch.cpu_map; } } @@ -68,10 +69,10 @@ intel_batchbuffer_reset(struct brw_context *brw) drm_intel_bo_map(brw->batch.bo, true); brw->batch.map = brw->batch.bo->virtual; } + brw->batch.map_next = brw->batch.map; brw->batch.reserved_space = BATCH_RESERVED; brw->batch.state_batch_offset = brw->batch.bo->size; - brw->batch.used = 0; brw->batch.needs_sol_reset = false; /* We don't know what ring the new batch will be sent to until we see the @@ -83,7 +84,7 @@ intel_batchbuffer_reset(struct brw_context *brw) void intel_batchbuffer_save_state(struct brw_context *brw) { - brw->batch.saved.used = brw->batch.used; + brw->batch.saved.map_next = brw->batch.map_next; brw->batch.saved.reloc_count = drm_intel_gem_bo_get_reloc_count(brw->batch.bo); } @@ -93,7 +94,7 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw) { drm_intel_gem_bo_clear_relocs(brw->batch.bo, brw->batch.saved.reloc_count); - brw->batch.used = brw->batch.saved.used; + brw->batch.map_next = brw->batch.saved.map_next; if (USED_BATCH(brw->batch) == 0) brw->batch.ring = UNKNOWN_RING; } @@ -395,13 +396,13 @@ _intel_batchbuffer_flush(struct brw_context *brw, */ uint32_t intel_batchbuffer_reloc(struct brw_context *brw, - drm_intel_bo *buffer, + drm_intel_bo *buffer, uint32_t offset, uint32_t read_domains, uint32_t write_domain, uint32_t delta) { int ret; - ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -416,11 +417,11 @@ intel_batchbuffer_reloc(struct brw_context *brw, uint64_t intel_batchbuffer_reloc64(struct brw_context *brw, - drm_intel_bo *buffer, + drm_intel_bo *buffer, uint32_t offset, uint32_t read_domains, uint32_t write_domain, uint32_t delta) { - int ret = drm_intel_bo_emit_reloc(brw->batch.bo, 4*brw->batch.used, + int ret = drm_intel_bo_emit_reloc(brw->batch.bo, offset, buffer, delta, read_domains, write_domain); assert(ret == 0); @@ -440,8 +441,8 @@ intel_batchbuffer_data(struct brw_context *brw, { assert((bytes & 3) == 0); intel_batchbuffer_require_space(brw, bytes, ring); - memcpy(brw->batch.map + brw->batch.used, data, bytes); - brw->batch.used += bytes >> 2; + memcpy(brw->batch.map_next, data, bytes); + brw->batch.map_next += bytes >> 2; } static void diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 12d20d363c3..84add927c9a 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -59,16 +59,18 @@ void intel_batchbuffer_data(struct brw_context *brw, uint32_t intel_batchbuffer_reloc(struct brw_context *brw, drm_intel_bo *buffer, + uint32_t offset, uint32_t read_domains, uint32_t write_domain, - uint32_t offset); + uint32_t delta); uint64_t intel_batchbuffer_reloc64(struct brw_context *brw, drm_intel_bo *buffer, + uint32_t offset, uint32_t read_domains, uint32_t write_domain, - uint32_t offset); + uint32_t delta); -#define USED_BATCH(batch) ((batch).used) +#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map)) static inline uint32_t float_as_int(float f) { @@ -100,7 +102,7 @@ intel_batchbuffer_emit_dword(struct brw_context *brw, GLuint dword) #ifdef DEBUG assert(intel_batchbuffer_space(brw) >= 4); #endif - brw->batch.map[brw->batch.used++] = dword; + *brw->batch.map_next++ = dword; assert(brw->batch.ring != UNKNOWN_RING); } @@ -163,23 +165,42 @@ intel_batchbuffer_advance(struct brw_context *brw) #endif } -#define BEGIN_BATCH(n) intel_batchbuffer_begin(brw, n, RENDER_RING) -#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(brw, n, BLT_RING) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(brw, d) -#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(brw, f) -#define OUT_RELOC(buf, read_domains, write_domain, delta) \ - OUT_BATCH(intel_batchbuffer_reloc(brw, buf, read_domains, write_domain, \ - delta)) +#define BEGIN_BATCH(n) do { \ + intel_batchbuffer_begin(brw, (n), RENDER_RING); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define BEGIN_BATCH_BLT(n) do { \ + intel_batchbuffer_begin(brw, (n), BLT_RING); \ + uint32_t *__map = brw->batch.map_next; \ + brw->batch.map_next += (n) + +#define OUT_BATCH(d) *__map++ = (d) +#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f))) + +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + uint32_t __offset = (__map - brw->batch.map) * 4; \ + OUT_BATCH(intel_batchbuffer_reloc(brw, (buf), __offset, \ + (read_domains), \ + (write_domain), \ + (delta))); \ +} while (0) /* Handle 48-bit address relocations for Gen8+ */ -#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ - uint64_t reloc64 = intel_batchbuffer_reloc64(brw, buf, read_domains, \ - write_domain, delta); \ - OUT_BATCH(reloc64); \ - OUT_BATCH(reloc64 >> 32); \ +#define OUT_RELOC64(buf, read_domains, write_domain, delta) do { \ + uint32_t __offset = (__map - brw->batch.map) * 4; \ + uint64_t reloc64 = intel_batchbuffer_reloc64(brw, (buf), __offset, \ + (read_domains), \ + (write_domain), \ + (delta)); \ + OUT_BATCH(reloc64); \ + OUT_BATCH(reloc64 >> 32); \ } while (0) -#define ADVANCE_BATCH() intel_batchbuffer_advance(brw); +#define ADVANCE_BATCH() \ + assert(__map == brw->batch.map_next); \ + intel_batchbuffer_advance(brw); \ +} while (0) #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c index bc390535c86..4fc3fa803cb 100644 --- a/src/mesa/drivers/dri/i965/intel_blit.c +++ b/src/mesa/drivers/dri/i965/intel_blit.c @@ -176,9 +176,10 @@ get_tr_vertical_align(uint32_t tr_mode, uint32_t cpp, bool is_src) { * tiling state would leak into other unsuspecting applications (like the X * server). */ -static void +static uint32_t * set_blitter_tiling(struct brw_context *brw, - bool dst_y_tiled, bool src_y_tiled) + bool dst_y_tiled, bool src_y_tiled, + uint32_t *__map) { assert(brw->gen >= 6); @@ -193,19 +194,19 @@ set_blitter_tiling(struct brw_context *brw, OUT_BATCH((BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y) << 16 | (dst_y_tiled ? BCS_SWCTRL_DST_Y : 0) | (src_y_tiled ? BCS_SWCTRL_SRC_Y : 0)); + return __map; } +#define SET_BLITTER_TILING(...) __map = set_blitter_tiling(__VA_ARGS__, __map) -#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) do { \ +#define BEGIN_BATCH_BLT_TILED(n, dst_y_tiled, src_y_tiled) \ BEGIN_BATCH_BLT(n + ((dst_y_tiled || src_y_tiled) ? 14 : 0)); \ if (dst_y_tiled || src_y_tiled) \ - set_blitter_tiling(brw, dst_y_tiled, src_y_tiled); \ - } while (0) + SET_BLITTER_TILING(brw, dst_y_tiled, src_y_tiled) -#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) do { \ +#define ADVANCE_BATCH_TILED(dst_y_tiled, src_y_tiled) \ if (dst_y_tiled || src_y_tiled) \ - set_blitter_tiling(brw, false, false); \ - ADVANCE_BATCH(); \ - } while (0) + SET_BLITTER_TILING(brw, false, false); \ + ADVANCE_BATCH() static int blt_pitch(struct intel_mipmap_tree *mt) |