diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.c | 106 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs_emit.c | 92 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_gs_state.c | 46 |
6 files changed, 208 insertions, 46 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index d94923195d5..95039aa65bc 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1075,6 +1075,9 @@ enum brw_message_target { # define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16) # define GEN6_GS_ENABLE (1 << 15) +# define BRW_GS_EDGE_INDICATOR_0 (1 << 8) +# define BRW_GS_EDGE_INDICATOR_1 (1 << 9) + #define _3DSTATE_HS 0x781B /* GEN7+ */ #define _3DSTATE_TE 0x781C /* GEN7+ */ #define _3DSTATE_DS 0x781D /* GEN7+ */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index dcb1fc91678..596be02158c 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -650,6 +650,11 @@ static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); } +static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); +} + static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, GLuint x, diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index e72ff5e5a8f..69ffa19c40c 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -53,12 +53,6 @@ static void compile_gs_prog( struct brw_context *brw, void *mem_ctx; GLuint program_size; - /* Gen6: VF has already converted into polygon, and LINELOOP is - * converted to LINESTRIP at the beginning of the 3D pipeline. - */ - if (intel->gen >= 6) - return; - memset(&c, 0, sizeof(c)); c.key = *key; @@ -80,24 +74,60 @@ static void compile_gs_prog( struct brw_context *brw, */ brw_set_mask_control(&c.func, BRW_MASK_DISABLE); - - /* Note that primitives which don't require a GS program have - * already been weeded out by this stage: - */ - - switch (key->primitive) { - case _3DPRIM_QUADLIST: - brw_gs_quads( &c, key ); - break; - case _3DPRIM_QUADSTRIP: - brw_gs_quad_strip( &c, key ); - break; - case _3DPRIM_LINELOOP: - brw_gs_lines( &c ); - break; - default: - ralloc_free(mem_ctx); - return; + if (intel->gen >= 6) { + unsigned num_verts; + bool check_edge_flag; + /* On Sandybridge, we use the GS for implementing transform feedback + * (called "Stream Out" in the PRM). + */ + switch (key->primitive) { + case _3DPRIM_POINTLIST: + num_verts = 1; + check_edge_flag = false; + break; + case _3DPRIM_LINELIST: + case _3DPRIM_LINESTRIP: + case _3DPRIM_LINELOOP: + num_verts = 2; + check_edge_flag = false; + break; + case _3DPRIM_TRILIST: + case _3DPRIM_TRIFAN: + case _3DPRIM_TRISTRIP: + case _3DPRIM_RECTLIST: + num_verts = 3; + check_edge_flag = false; + break; + case _3DPRIM_QUADLIST: + case _3DPRIM_QUADSTRIP: + case _3DPRIM_POLYGON: + num_verts = 3; + check_edge_flag = true; + break; + default: + assert(!"Unexpected primitive type in Gen6 SOL program."); + return; + } + gen6_sol_program(&c, key, num_verts, check_edge_flag); + } else { + /* On Gen4-5, we use the GS to decompose certain types of primitives. + * Note that primitives which don't require a GS program have already + * been weeded out by now. + */ + switch (key->primitive) { + case _3DPRIM_QUADLIST: + brw_gs_quads( &c, key ); + break; + case _3DPRIM_QUADSTRIP: + brw_gs_quad_strip( &c, key ); + break; + case _3DPRIM_LINELOOP: + brw_gs_lines( &c ); + break; + default: + ralloc_free(mem_ctx); + return; + } } /* get the program @@ -148,11 +178,26 @@ static void populate_key( struct brw_context *brw, /* _NEW_TRANSFORM */ key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0); - key->need_gs_prog = (intel->gen >= 6) - ? 0 - : (brw->primitive == _3DPRIM_QUADLIST || - brw->primitive == _3DPRIM_QUADSTRIP || - brw->primitive == _3DPRIM_LINELOOP); + if (intel->gen >= 7) { + /* On Gen7 and later, we don't use GS (yet). */ + key->need_gs_prog = false; + } else if (intel->gen == 6) { + /* On Gen6, GS is used for transform feedback. */ + /* _NEW_TRANSFORM_FEEDBACK */ + key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active; + } else { + /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP + * into simpler primitives. + */ + key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST || + brw->primitive == _3DPRIM_QUADSTRIP || + brw->primitive == _3DPRIM_LINELOOP); + } + /* For testing, the environment variable INTEL_FORCE_GS can be used to + * force a GS program to be used, even if it's not necessary. + */ + if (getenv("INTEL_FORCE_GS")) + key->need_gs_prog = true; } /* Calculate interpolants for triangle and line rasterization. @@ -183,7 +228,8 @@ brw_upload_gs_prog(struct brw_context *brw) const struct brw_tracked_state brw_gs_prog = { .dirty = { .mesa = (_NEW_LIGHT | - _NEW_TRANSFORM), + _NEW_TRANSFORM | + _NEW_TRANSFORM_FEEDBACK), .brw = BRW_NEW_PRIMITIVE, .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 93448a77f08..abcb0b2db59 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -73,5 +73,7 @@ struct brw_gs_compile { void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ); void brw_gs_lines( struct brw_gs_compile *c ); +void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, + unsigned num_verts, bool check_edge_flag); #endif diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 9b1dfbfa97b..322f9bd81c1 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -101,6 +101,37 @@ static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c, } /** + * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0. + * + * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0 + * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of + * DWORD 2. So this function extracts the primitive type field, bitshifts it + * appropriately, and stores it in c->reg.header. + */ +static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c) +{ + struct brw_compile *p = &c->func; + brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), + brw_imm_ud(0x1f)); + brw_SHL(p, get_element_ud(c->reg.header, 2), + get_element_ud(c->reg.header, 2), brw_imm_ud(2)); +} + +/** + * Apply an additive offset to DWORD 2 of c->reg.header. + * + * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately + * for each vertex. + */ +static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset) +{ + struct brw_compile *p = &c->func; + brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2), + brw_imm_d(offset)); +} + + +/** * Emit a vertex using the URB_WRITE message. Use the contents of * c->reg.header for the message header, and the registers starting at \c vert * for the vertex data. @@ -269,3 +300,64 @@ void brw_gs_lines( struct brw_gs_compile *c ) | URB_WRITE_PRIM_END)); brw_gs_emit_vue(c, c->reg.vertex[1], 1); } + +/** + * Generate the geometry shader program used on Gen6 to perform stream output + * (transform feedback). + */ +void +gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, + unsigned num_verts, bool check_edge_flags) +{ + struct brw_compile *p = &c->func; + + brw_gs_alloc_regs(c, num_verts); + brw_gs_initialize_header(c); + + brw_gs_ff_sync(c, 1); + + brw_gs_overwrite_header_dw2_from_r0(c); + switch (num_verts) { + case 1: + brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); + brw_gs_emit_vue(c, c->reg.vertex[0], true); + break; + case 2: + brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); + brw_gs_emit_vue(c, c->reg.vertex[0], false); + brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); + brw_gs_emit_vue(c, c->reg.vertex[1], true); + break; + case 3: + if (check_edge_flags) { + /* Only emit vertices 0 and 1 if this is the first triangle of the + * polygon. Otherwise they are redundant. + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + get_element_ud(c->reg.R0, 2), + brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); + brw_IF(p, BRW_EXECUTE_1); + } + brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); + brw_gs_emit_vue(c, c->reg.vertex[0], false); + brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); + brw_gs_emit_vue(c, c->reg.vertex[1], false); + if (check_edge_flags) { + brw_ENDIF(p); + /* Only emit vertex 2 in PRIM_END mode if this is the last triangle + * of the polygon. Otherwise leave the primitive incomplete because + * there are more polygon vertices coming. + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + get_element_ud(c->reg.R0, 2), + brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + } + brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_gs_emit_vue(c, c->reg.vertex[2], true); + break; + } +} diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index d29f0290727..42962a64d36 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -44,22 +44,36 @@ upload_gs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); - // GS should never be used on Gen6. Disable it. - assert(!brw->gs.prog_active); - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); /* prog_bo */ - OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | - (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); - OUT_BATCH(0); /* scratch space base offset */ - OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | - (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | - (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | - GEN6_GS_STATISTICS_ENABLE | - GEN6_GS_RENDERING_ENABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); + if (brw->gs.prog_active) { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(brw->gs.prog_offset); + OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE); + OUT_BATCH(0); /* no scratch space */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT)); + OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_SO_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(GEN6_GS_ENABLE); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); /* prog_bo */ + OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | + (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); + OUT_BATCH(0); /* scratch space base offset */ + OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) | + (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) | + (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT)); + OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) | + GEN6_GS_STATISTICS_ENABLE | + GEN6_GS_RENDERING_ENABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); + } } const struct brw_tracked_state gen6_gs_state = { |