diff options
-rw-r--r-- | src/gallium/drivers/i965/brw_context.h | 13 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_pipe_shader.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_vs.c | 14 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_vs.h | 7 | ||||
-rw-r--r-- | src/gallium/drivers/i965/brw_vs_emit.c | 116 |
5 files changed, 113 insertions, 43 deletions
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 4a975ecd7ec..31f3cf36855 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -161,11 +161,24 @@ struct brw_vertex_shader { GLboolean use_const_buffer; }; +struct brw_fs_signature { + GLuint nr_inputs; + struct { + GLuint semantic:5; + GLuint semantic_index:27; + } input[PIPE_MAX_SHADER_INPUTS]; +}; + +#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \ + ((s)->nr_inputs * sizeof (s)->input[0])) + struct brw_fragment_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; + struct brw_fs_signature signature; + unsigned iz_lookup; //unsigned wm_lookup; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 44f9ad6f9cd..7febf9e0c2f 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe, tgsi_scan_shader(fs->tokens, &fs->info); + fs->signature.nr_inputs = fs->info.num_inputs; + for (i = 0; i < fs->info.num_inputs; i++) { + fs->signature.input[i].semantic = fs->info.input_semantic_name[i]; + fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i]; + } + for (i = 0; i < fs->info.num_inputs; i++) if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION) fs->uses_depth = 1; diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 966940ceacb..05a62ed9745 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_shader *vp = brw->curr.vertex_shader; + struct brw_fragment_shader *fs = brw->curr.fragment_shader; enum pipe_error ret; memset(&key, 0, sizeof(key)); - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ key.program_string_id = vp->id; key.nr_userclip = brw->curr.ucp.nr; key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL || brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL); + memcpy(&key.fs_signature, &fs->signature, + brw_fs_signature_size(&fs->signature)); + + /* Make an early check for the key. */ if (brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), + &key, brw_vs_prog_key_size(&key), NULL, 0, &brw->vs.prog_data, &brw->vs.prog_bo)) @@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = PIPE_NEW_CLIP | PIPE_NEW_RAST, + .mesa = (PIPE_NEW_CLIP | + PIPE_NEW_RAST | + PIPE_NEW_FRAGMENT_SHADER), .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index b4e450d89bf..3d1598d02b9 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -43,8 +43,11 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint pad:26; + struct brw_fs_signature fs_signature; }; +#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \ + brw_fs_signature_size(&(s)->fs_signature)) #define MAX_IF_DEPTH 32 @@ -65,8 +68,8 @@ struct brw_vs_compile { GLboolean copy_edgeflag; - GLuint first_output; - GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint overflow_grf_start; + GLuint overflow_count; GLuint first_tmp; GLuint last_tmp; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 26f0ec5a11a..933c9c4d63c 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c ) } +static boolean is_position_output( struct brw_vs_compile *c, + unsigned vs_output ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); +} + + +static boolean find_output_slot( struct brw_vs_compile *c, + unsigned vs_output, + unsigned *fs_input_slot ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && + c->key.fs_signature.input[i].semantic_index == index) { + *fs_input_slot = i; + return TRUE; + } + } + + return FALSE; +} + /** * Preallocate GRF register before code emit. @@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = c->prog_data.nr_outputs; - c->first_output = reg; - c->first_overflow_output = 0; if (c->chipset.is_igdng) mrf = 8; else mrf = 4; + + if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) { + c->overflow_grf_start = reg; + c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF; + reg += c->overflow_count; + } + /* XXX: need to access vertex output semantics here: */ for (i = 0; i < c->prog_data.nr_outputs; i++) { - assert(i < Elements(c->regs[TGSI_FILE_OUTPUT])); + unsigned slot; - /* XXX: Hardwire position to zero: - */ - if (i == 0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - /* XXX: disable psiz: + /* XXX: Put output position in slot zero always. Clipper, etc, + * need access to this reg. */ - else if (0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + if (is_position_output(c, i)) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */ reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } - else if (mrf < 16) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + else if (find_output_slot(c, i, &slot)) { + + if (0 /* is_psize_output(c, i) */ ) { + /* c->psize_out.grf = reg; */ + /* c->psize_out.mrf = i; */ + } + + /* The first (16-4) outputs can go straight into the message regs. + */ + if (slot + mrf < BRW_MAX_MRF) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf); + } + else { + int grf = c->overflow_grf_start + slot - BRW_MAX_MRF; + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0); + } } else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; + c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg(); } } @@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; + int i; GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { @@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) len_vertext_header = 2; } - eot = (c->first_overflow_output == 0); + eot = (c->overflow_count == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c) 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); - if (c->first_overflow_output > 0) { - /* Not all of the vertex outputs/results fit into the MRF. - * Move the overflowed attributes from the GRF to the MRF and - * issue another brw_urb_WRITE(). - */ + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) { + unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF); + GLuint j; + + eot = (i + nr >= c->overflow_count); + /* XXX I'm not 100% sure about which MRF regs to use here. Starting * at mrf[4] atm... */ - GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) { - /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]); - mrf++; + for (j = 0; j < nr; j++) { + brw_MOV(p, brw_message_reg(4+j), + brw_vec8_grf(c->overflow_grf_start + i + j, 0)); } brw_urb_WRITE(p, @@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - mrf+1, /* msg len */ + nr+1, /* msg len */ 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - BRW_MAX_MRF-1, /* urb destination offset */ + eot, /* eot */ + eot, /* writes complete */ + i-1, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); } } |