diff options
author | Keith Whitwell <[email protected]> | 2009-11-10 18:07:11 -0800 |
---|---|---|
committer | Keith Whitwell <[email protected]> | 2009-11-11 18:51:58 -0800 |
commit | 2f54d02d205468a840b35a3554f2ad8ffc31ec9c (patch) | |
tree | ac443da5e09a40acf67fa83905f6494e82685207 /src/gallium/drivers/i965/brw_vs_emit.c | |
parent | 0c547d63c497f06c38f7a3c000e478bdcf2594b6 (diff) |
i965g: consult fs inputs when laying out vs output regs
Vertex shader now emits just the FS inputs, in the positions and order
expected by the fragment shader.
This means potentially regenerating the vertex shader to match
different fragment shader's input layouts.
Diffstat (limited to 'src/gallium/drivers/i965/brw_vs_emit.c')
-rw-r--r-- | src/gallium/drivers/i965/brw_vs_emit.c | 116 |
1 files changed, 80 insertions, 36 deletions
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 26f0ec5a11a..933c9c4d63c 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c ) } +static boolean is_position_output( struct brw_vs_compile *c, + unsigned vs_output ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); +} + + +static boolean find_output_slot( struct brw_vs_compile *c, + unsigned vs_output, + unsigned *fs_input_slot ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && + c->key.fs_signature.input[i].semantic_index == index) { + *fs_input_slot = i; + return TRUE; + } + } + + return FALSE; +} + /** * Preallocate GRF register before code emit. @@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = c->prog_data.nr_outputs; - c->first_output = reg; - c->first_overflow_output = 0; if (c->chipset.is_igdng) mrf = 8; else mrf = 4; + + if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) { + c->overflow_grf_start = reg; + c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF; + reg += c->overflow_count; + } + /* XXX: need to access vertex output semantics here: */ for (i = 0; i < c->prog_data.nr_outputs; i++) { - assert(i < Elements(c->regs[TGSI_FILE_OUTPUT])); + unsigned slot; - /* XXX: Hardwire position to zero: - */ - if (i == 0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - /* XXX: disable psiz: + /* XXX: Put output position in slot zero always. Clipper, etc, + * need access to this reg. */ - else if (0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + if (is_position_output(c, i)) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */ reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } - else if (mrf < 16) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + else if (find_output_slot(c, i, &slot)) { + + if (0 /* is_psize_output(c, i) */ ) { + /* c->psize_out.grf = reg; */ + /* c->psize_out.mrf = i; */ + } + + /* The first (16-4) outputs can go straight into the message regs. + */ + if (slot + mrf < BRW_MAX_MRF) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf); + } + else { + int grf = c->overflow_grf_start + slot - BRW_MAX_MRF; + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0); + } } else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; + c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg(); } } @@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; + int i; GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { @@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) len_vertext_header = 2; } - eot = (c->first_overflow_output == 0); + eot = (c->overflow_count == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c) 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); - if (c->first_overflow_output > 0) { - /* Not all of the vertex outputs/results fit into the MRF. - * Move the overflowed attributes from the GRF to the MRF and - * issue another brw_urb_WRITE(). - */ + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) { + unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF); + GLuint j; + + eot = (i + nr >= c->overflow_count); + /* XXX I'm not 100% sure about which MRF regs to use here. Starting * at mrf[4] atm... */ - GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) { - /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]); - mrf++; + for (j = 0; j < nr; j++) { + brw_MOV(p, brw_message_reg(4+j), + brw_vec8_grf(c->overflow_grf_start + i + j, 0)); } brw_urb_WRITE(p, @@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - mrf+1, /* msg len */ + nr+1, /* msg len */ 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - BRW_MAX_MRF-1, /* urb destination offset */ + eot, /* eot */ + eot, /* writes complete */ + i-1, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); } } |