summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/i965/brw_context.h13
-rw-r--r--src/gallium/drivers/i965/brw_pipe_shader.c6
-rw-r--r--src/gallium/drivers/i965/brw_vs.c14
-rw-r--r--src/gallium/drivers/i965/brw_vs.h7
-rw-r--r--src/gallium/drivers/i965/brw_vs_emit.c116
5 files changed, 113 insertions, 43 deletions
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 4a975ecd7ec..31f3cf36855 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -161,11 +161,24 @@ struct brw_vertex_shader {
GLboolean use_const_buffer;
};
+struct brw_fs_signature {
+ GLuint nr_inputs;
+ struct {
+ GLuint semantic:5;
+ GLuint semantic_index:27;
+ } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+ ((s)->nr_inputs * sizeof (s)->input[0]))
+
struct brw_fragment_shader {
const struct tgsi_token *tokens;
struct tgsi_shader_info info;
+ struct brw_fs_signature signature;
+
unsigned iz_lookup;
//unsigned wm_lookup;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 44f9ad6f9cd..7febf9e0c2f 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
tgsi_scan_shader(fs->tokens, &fs->info);
+ fs->signature.nr_inputs = fs->info.num_inputs;
+ for (i = 0; i < fs->info.num_inputs; i++) {
+ fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+ fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+ }
+
for (i = 0; i < fs->info.num_inputs; i++)
if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
fs->uses_depth = 1;
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 966940ceacb..05a62ed9745 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
{
struct brw_vs_prog_key key;
struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+ struct brw_fragment_shader *fs = brw->curr.fragment_shader;
enum pipe_error ret;
memset(&key, 0, sizeof(key));
- /* Just upload the program verbatim for now. Always send it all
- * the inputs it asks for, whether they are varying or not.
- */
key.program_string_id = vp->id;
key.nr_userclip = brw->curr.ucp.nr;
key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
+ memcpy(&key.fs_signature, &fs->signature,
+ brw_fs_signature_size(&fs->signature));
+
+
/* Make an early check for the key.
*/
if (brw_search_cache(&brw->cache, BRW_VS_PROG,
- &key, sizeof(key),
+ &key, brw_vs_prog_key_size(&key),
NULL, 0,
&brw->vs.prog_data,
&brw->vs.prog_bo))
@@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
*/
const struct brw_tracked_state brw_vs_prog = {
.dirty = {
- .mesa = PIPE_NEW_CLIP | PIPE_NEW_RAST,
+ .mesa = (PIPE_NEW_CLIP |
+ PIPE_NEW_RAST |
+ PIPE_NEW_FRAGMENT_SHADER),
.brw = BRW_NEW_VERTEX_PROGRAM,
.cache = 0
},
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index b4e450d89bf..3d1598d02b9 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -43,8 +43,11 @@ struct brw_vs_prog_key {
GLuint nr_userclip:4;
GLuint copy_edgeflag:1;
GLuint pad:26;
+ struct brw_fs_signature fs_signature;
};
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+ brw_fs_signature_size(&(s)->fs_signature))
#define MAX_IF_DEPTH 32
@@ -65,8 +68,8 @@ struct brw_vs_compile {
GLboolean copy_edgeflag;
- GLuint first_output;
- GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+ GLuint overflow_grf_start;
+ GLuint overflow_count;
GLuint first_tmp;
GLuint last_tmp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 26f0ec5a11a..933c9c4d63c 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c )
}
+static boolean is_position_output( struct brw_vs_compile *c,
+ unsigned vs_output )
+{
+ struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+
+ return (semantic == TGSI_SEMANTIC_POSITION &&
+ index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+ unsigned vs_output,
+ unsigned *fs_input_slot )
+{
+ struct brw_vertex_shader *vs = c->vp;
+ unsigned semantic = vs->info.output_semantic_name[vs_output];
+ unsigned index = vs->info.output_semantic_index[vs_output];
+ unsigned i;
+
+ for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+ if (c->key.fs_signature.input[i].semantic == semantic &&
+ c->key.fs_signature.input[i].semantic_index == index) {
+ *fs_input_slot = i;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
/**
* Preallocate GRF register before code emit.
@@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
/* Allocate outputs. The non-position outputs go straight into message regs.
*/
c->nr_outputs = c->prog_data.nr_outputs;
- c->first_output = reg;
- c->first_overflow_output = 0;
if (c->chipset.is_igdng)
mrf = 8;
else
mrf = 4;
+
+ if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+ c->overflow_grf_start = reg;
+ c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+ reg += c->overflow_count;
+ }
+
/* XXX: need to access vertex output semantics here:
*/
for (i = 0; i < c->prog_data.nr_outputs; i++) {
- assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+ unsigned slot;
- /* XXX: Hardwire position to zero:
- */
- if (i == 0) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
- }
- /* XXX: disable psiz:
+ /* XXX: Put output position in slot zero always. Clipper, etc,
+ * need access to this reg.
*/
- else if (0) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+ if (is_position_output(c, i)) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
reg++;
- mrf++; /* just a placeholder? XXX fix later stages & remove this */
}
- else if (mrf < 16) {
- c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
- mrf++;
+ else if (find_output_slot(c, i, &slot)) {
+
+ if (0 /* is_psize_output(c, i) */ ) {
+ /* c->psize_out.grf = reg; */
+ /* c->psize_out.mrf = i; */
+ }
+
+ /* The first (16-4) outputs can go straight into the message regs.
+ */
+ if (slot + mrf < BRW_MAX_MRF) {
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+ }
+ else {
+ int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+ }
}
else {
- /* too many vertex results to fit in MRF, use GRF for overflow */
- if (!c->first_overflow_output)
- c->first_overflow_output = i;
- c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
- reg++;
+ c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
}
}
@@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
struct brw_reg ndc;
int eot;
+ int i;
GLuint len_vertext_header = 2;
if (c->key.copy_edgeflag) {
@@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
len_vertext_header = 2;
}
- eot = (c->first_overflow_output == 0);
+ eot = (c->overflow_count == 0);
brw_urb_WRITE(p,
brw_null_reg(), /* dest */
@@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c)
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
- if (c->first_overflow_output > 0) {
- /* Not all of the vertex outputs/results fit into the MRF.
- * Move the overflowed attributes from the GRF to the MRF and
- * issue another brw_urb_WRITE().
- */
+ /* Not all of the vertex outputs/results fit into the MRF.
+ * Move the overflowed attributes from the GRF to the MRF and
+ * issue another brw_urb_WRITE().
+ */
+ for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+ unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+ GLuint j;
+
+ eot = (i + nr >= c->overflow_count);
+
/* XXX I'm not 100% sure about which MRF regs to use here. Starting
* at mrf[4] atm...
*/
- GLuint i, mrf = 0;
- for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
- /* move from GRF to MRF */
- brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
- mrf++;
+ for (j = 0; j < nr; j++) {
+ brw_MOV(p, brw_message_reg(4+j),
+ brw_vec8_grf(c->overflow_grf_start + i + j, 0));
}
brw_urb_WRITE(p,
@@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
c->r0, /* src */
0, /* allocate */
1, /* used */
- mrf+1, /* msg len */
+ nr+1, /* msg len */
0, /* response len */
- 1, /* eot */
- 1, /* writes complete */
- BRW_MAX_MRF-1, /* urb destination offset */
+ eot, /* eot */
+ eot, /* writes complete */
+ i-1, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
}
}