diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.sources | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_draw.c | 38 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/hsw_sol.c | 263 |
5 files changed, 318 insertions, 12 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 8c60954ee64..d35775ea3ad 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -228,6 +228,7 @@ i965_FILES = \ gen8_vs_state.c \ gen8_wm_depth_stencil.c \ hsw_queryobj.c \ + hsw_sol.c \ intel_batchbuffer.c \ intel_batchbuffer.h \ intel_blit.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 1380d4138cf..26514a0c7b6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -372,13 +372,18 @@ brw_init_driver_functions(struct brw_context *brw, functions->NewTransformFeedback = brw_new_transform_feedback; functions->DeleteTransformFeedback = brw_delete_transform_feedback; - functions->GetTransformFeedbackVertexCount = - brw_get_transform_feedback_vertex_count; - if (brw->gen >= 7) { + if (brw->intelScreen->has_mi_math_and_lrr) { + functions->BeginTransformFeedback = hsw_begin_transform_feedback; + functions->EndTransformFeedback = hsw_end_transform_feedback; + functions->PauseTransformFeedback = hsw_pause_transform_feedback; + functions->ResumeTransformFeedback = hsw_resume_transform_feedback; + } else if (brw->gen >= 7) { functions->BeginTransformFeedback = gen7_begin_transform_feedback; functions->EndTransformFeedback = gen7_end_transform_feedback; functions->PauseTransformFeedback = gen7_pause_transform_feedback; functions->ResumeTransformFeedback = gen7_resume_transform_feedback; + functions->GetTransformFeedbackVertexCount = + brw_get_transform_feedback_vertex_count; } else { functions->BeginTransformFeedback = brw_begin_transform_feedback; functions->EndTransformFeedback = brw_end_transform_feedback; @@ -494,7 +499,8 @@ brw_initialize_context_constants(struct brw_context *brw) ctx->Const.MaxTransformFeedbackSeparateComponents = BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS; - ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true; + ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = + !brw->intelScreen->has_mi_math_and_lrr; int max_samples; const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index b620f14b39f..035cbe9d325 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1656,6 +1656,20 @@ void gen7_resume_transform_feedback(struct gl_context *ctx, struct gl_transform_feedback_object *obj); +/* hsw_sol.c */ +void +hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, + struct gl_transform_feedback_object *obj); +void +hsw_end_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj); +void +hsw_pause_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj); +void +hsw_resume_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj); + /* brw_blorp_blit.cpp */ GLbitfield brw_blorp_framebuffer(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index afa8a4e9eae..9d034cfdb33 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -153,7 +153,9 @@ trim(GLenum prim, GLuint length) static void brw_emit_prim(struct brw_context *brw, const struct _mesa_prim *prim, - uint32_t hw_prim) + uint32_t hw_prim, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream) { int verts_per_instance; int vertex_access_type; @@ -185,7 +187,7 @@ brw_emit_prim(struct brw_context *brw, verts_per_instance = prim->count; /* If nothing to emit, just return. */ - if (verts_per_instance == 0 && !prim->is_indirect) + if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj) return; /* If we're set to always flush, do it before and after the primitive emit. @@ -197,7 +199,25 @@ brw_emit_prim(struct brw_context *brw, brw_emit_mi_flush(brw); /* If indirect, emit a bunch of loads from the indirect BO. */ - if (prim->is_indirect) { + if (xfb_obj) { + indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE; + + brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT, + xfb_obj->prim_count_bo, + I915_GEM_DOMAIN_VERTEX, 0, + stream * sizeof(uint32_t)); + BEGIN_BATCH(9); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2)); + OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT); + OUT_BATCH(prim->num_instances); + OUT_BATCH(GEN7_3DPRIM_START_VERTEX); + OUT_BATCH(0); + OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX); + OUT_BATCH(0); + OUT_BATCH(GEN7_3DPRIM_START_INSTANCE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else if (prim->is_indirect) { struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer; drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_buffer_object(indirect_buffer), @@ -382,6 +402,8 @@ brw_try_draw_prims(struct gl_context *ctx, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index, + struct brw_transform_feedback_object *xfb_obj, + unsigned stream, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); @@ -531,7 +553,7 @@ retry: brw_upload_render_state(brw); } - brw_emit_prim(brw, &prims[i], brw->primitive); + brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream); brw->no_batch_wrap = false; @@ -573,14 +595,14 @@ brw_draw_prims(struct gl_context *ctx, GLboolean index_bounds_valid, GLuint min_index, GLuint max_index, - struct gl_transform_feedback_object *unused_tfb_object, + struct gl_transform_feedback_object *gl_xfb_obj, unsigned stream, struct gl_buffer_object *indirect) { struct brw_context *brw = brw_context(ctx); const struct gl_client_array **arrays = ctx->Array._DrawArrays; - - assert(unused_tfb_object == NULL); + struct brw_transform_feedback_object *xfb_obj = + (struct brw_transform_feedback_object *) gl_xfb_obj; if (!brw_check_conditional_render(brw)) return; @@ -619,7 +641,7 @@ brw_draw_prims(struct gl_context *ctx, * to it. */ brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index, - indirect); + xfb_obj, stream, indirect); } void diff --git a/src/mesa/drivers/dri/i965/hsw_sol.c b/src/mesa/drivers/dri/i965/hsw_sol.c new file mode 100644 index 00000000000..ef8fcf48346 --- /dev/null +++ b/src/mesa/drivers/dri/i965/hsw_sol.c @@ -0,0 +1,263 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * An implementation of the transform feedback driver hooks for Haswell + * and later hardware. This uses MI_MATH to compute the number of vertices + * written (for use by DrawTransformFeedback()) without any CPU<->GPU + * synchronization which could stall. + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" +#include "main/transformfeedback.h" + +/** + * We store several values in obj->prim_count_bo: + * + * [4x 32-bit values]: Final Number of Vertices Written + * [4x 32-bit values]: Tally of Primitives Written So Far + * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots + * + * The first set of values is used by DrawTransformFeedback(), which + * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs + * an indirect draw. The other values are just temporary storage. + */ + +#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t)) +#define START_OFFSET (TALLY_OFFSET * 2) + +/** + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) + * to prim_count_bo. + */ +static void +save_prim_start_values(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + /* Flush any drawing so that the counters have the right values. */ + brw_emit_mi_flush(brw); + + /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + brw_store_register_mem64(brw, obj->prim_count_bo, + GEN7_SO_NUM_PRIMS_WRITTEN(i), + START_OFFSET + i * sizeof(uint64_t)); + } +} + +/** + * Compute the number of primitives written during our most recent + * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value + * minus the stashed "start" value), and add it to our running tally. + * + * If \p finalize is true, also compute the number of vertices written + * (by multiplying by the number of vertices per primitive), and store + * that to the "final" location. + * + * Otherwise, just overwrite the old tally with the new one. + */ +static void +tally_prims_written(struct brw_context *brw, + struct brw_transform_feedback_object *obj, + bool finalize) +{ + /* Flush any drawing so that the counters have the right values. */ + brw_emit_mi_flush(brw); + + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + /* GPR0 = Tally */ + brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0); + brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo, + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, + TALLY_OFFSET + i * sizeof(uint32_t)); + /* GPR1 = Start Snapshot */ + brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo, + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION, + START_OFFSET + i * sizeof(uint64_t)); + /* GPR2 = Ending Snapshot */ + brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2)); + + BEGIN_BATCH(9); + OUT_BATCH(HSW_MI_MATH | (9 - 2)); + /* GPR1 = GPR2 (End) - GPR1 (Start) */ + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); + OUT_BATCH(MI_MATH_ALU0(SUB)); + OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); + /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */ + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); + OUT_BATCH(MI_MATH_ALU0(ADD)); + OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); + ADVANCE_BATCH(); + + if (!finalize) { + /* Write back the new tally */ + brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), + TALLY_OFFSET + i * sizeof(uint32_t)); + } else { + /* Convert the number of primitives to the number of vertices. */ + if (obj->primitive_mode == GL_LINES) { + /* Double R0 (R0 = R0 + R0) */ + BEGIN_BATCH(5); + OUT_BATCH(HSW_MI_MATH | (5 - 2)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); + OUT_BATCH(MI_MATH_ALU0(ADD)); + OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); + ADVANCE_BATCH(); + } else if (obj->primitive_mode == GL_TRIANGLES) { + /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */ + BEGIN_BATCH(9); + OUT_BATCH(HSW_MI_MATH | (9 - 2)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0)); + OUT_BATCH(MI_MATH_ALU0(ADD)); + OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0)); + OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1)); + OUT_BATCH(MI_MATH_ALU0(ADD)); + OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU)); + ADVANCE_BATCH(); + } + /* Store it to the final result */ + brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0), + i * sizeof(uint32_t)); + } + } +} + +/** + * BeginTransformFeedback() driver hook. + */ +void +hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, + struct gl_transform_feedback_object *obj) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + brw_obj->primitive_mode = mode; + + /* Reset the SO buffer offsets to 0. */ + if (brw->gen >= 8) { + brw_obj->zero_offsets = true; + } else { + BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS); + OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2)); + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); + } + + /* Zero out the initial tallies */ + brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull); + brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull); + + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + save_prim_start_values(brw, brw_obj); +} + +/** + * PauseTransformFeedback() driver hook. + */ +void +hsw_pause_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + if (brw->is_haswell) { + /* Save the SOL buffer offset register values. */ + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + BEGIN_BATCH(3); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); + OUT_RELOC(brw_obj->offset_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + i * sizeof(uint32_t)); + ADVANCE_BATCH(); + } + } + + /* Add any primitives written to our tally */ + tally_prims_written(brw, brw_obj, false); +} + +/** + * ResumeTransformFeedback() driver hook. + */ +void +hsw_resume_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + if (brw->is_haswell) { + /* Reload the SOL buffer offset registers. */ + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + BEGIN_BATCH(3); + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); + OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); + OUT_RELOC(brw_obj->offset_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + i * sizeof(uint32_t)); + ADVANCE_BATCH(); + } + } + + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + save_prim_start_values(brw, brw_obj); +} + +/** + * EndTransformFeedback() driver hook. + */ +void +hsw_end_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + /* Add any primitives written to our tally, convert it from the number + * of primitives written to the number of vertices written, and store + * it in the "final" location in the buffer which DrawTransformFeedback() + * will use as the vertex count. + */ + tally_prims_written(brw, brw_obj, true); +} |