summaryrefslogtreecommitdiffstats
path: root/src/mesa
diff options
context:
space:
mode:
authorPaul Berry <[email protected]>2011-08-23 11:07:56 -0700
committerPaul Berry <[email protected]>2011-09-06 11:04:18 -0700
commite604f98f580b74dd6c597ef492706ce74697443e (patch)
tree0ce2f9a9741da33802095a2991d1055136a60685 /src/mesa
parentd1435a49e9765ab4e988dd8b65a5599da34f3512 (diff)
i965: new VS: use the VUE map to write out vertex attributes.
Previously, the new VS backend used two functions, emit_vue_header_gen6() and emit_vue_header_gen4() to emit the fixed parts of the VUE, and then a pair of carefully-constructed loops to emit the rest of the VUE, leaving out the parts that were already emitted as part of the header. This patch changes the new VS backend to use the VUE map to emit the entire VUE. Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp165
2 files changed, 55 insertions, 113 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 01313ec192d..360aa60ce4e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -466,8 +466,7 @@ public:
void emit_ndc_computation();
void emit_psiz_and_flags(struct brw_reg reg);
void emit_clip_distances(struct brw_reg reg, int offset);
- int emit_vue_header_gen6(int header_mrf);
- int emit_vue_header_gen4(int header_mrf);
+ void emit_urb_slot(int mrf, int vert_result);
void emit_urb_writes(void);
src_reg get_scratch_offset(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8cc52c1d181..478c1c8e556 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1807,83 +1807,53 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
}
}
-int
-vec4_visitor::emit_vue_header_gen4(int header_mrf)
+void
+vec4_visitor::emit_urb_slot(int mrf, int vert_result)
{
- emit_ndc_computation();
-
- emit_psiz_and_flags(brw_message_reg(header_mrf++));
-
- if (intel->gen == 5) {
- /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
- * dword 0-3 (m1) of the header is indices, point width, clip flags.
- * dword 4-7 (m2) is the ndc position (set above)
- * dword 8-11 (m3) of the vertex header is the 4D space position
- * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
- * m6 is a pad so that the vertex element data is aligned
- * m7 is the first vertex data we fill.
- */
- current_annotation = "NDC";
- emit(MOV(brw_message_reg(header_mrf++),
- src_reg(output_reg[BRW_VERT_RESULT_NDC])));
-
- current_annotation = "gl_Position";
- emit(MOV(brw_message_reg(header_mrf++),
- src_reg(output_reg[VERT_RESULT_HPOS])));
-
- /* user clip distance. */
- emit_clip_distances(brw_message_reg(header_mrf++), 0);
- emit_clip_distances(brw_message_reg(header_mrf++), 4);
+ struct brw_reg reg = brw_message_reg(mrf);
- /* Pad so that vertex element data is aligned. */
- header_mrf++;
- } else {
- /* There are 8 dwords in VUE header pre-Ironlake:
- * dword 0-3 (m1) is indices, point width, clip flags.
- * dword 4-7 (m2) is ndc position (set above)
- *
- * dword 8-11 (m3) is the first vertex data.
- */
+ switch (vert_result) {
+ case VERT_RESULT_PSIZ:
+ /* PSIZ is always in slot 0, and is coupled with other flags. */
+ current_annotation = "indices, point width, clip flags";
+ emit_psiz_and_flags(reg);
+ break;
+ case BRW_VERT_RESULT_NDC:
current_annotation = "NDC";
- emit(MOV(brw_message_reg(header_mrf++),
- src_reg(output_reg[BRW_VERT_RESULT_NDC])));
-
+ emit(MOV(reg, src_reg(output_reg[BRW_VERT_RESULT_NDC])));
+ break;
+ case BRW_VERT_RESULT_HPOS_DUPLICATE:
+ case VERT_RESULT_HPOS:
current_annotation = "gl_Position";
- emit(MOV(brw_message_reg(header_mrf++),
- src_reg(output_reg[VERT_RESULT_HPOS])));
+ emit(MOV(reg, src_reg(output_reg[VERT_RESULT_HPOS])));
+ break;
+ case BRW_VERT_RESULT_CLIP0:
+ current_annotation = "user clip distances";
+ emit_clip_distances(reg, 0);
+ break;
+ case BRW_VERT_RESULT_CLIP1:
+ current_annotation = "user clip distances";
+ emit_clip_distances(reg, 4);
+ break;
+ case BRW_VERT_RESULT_PAD:
+ /* No need to write to this slot */
+ break;
+ default: {
+ assert (vert_result < VERT_RESULT_MAX);
+ current_annotation = NULL;
+ /* Copy the register, saturating if necessary */
+ vec4_instruction *inst = emit(MOV(reg,
+ src_reg(output_reg[vert_result])));
+ if ((vert_result == VERT_RESULT_COL0 ||
+ vert_result == VERT_RESULT_COL1 ||
+ vert_result == VERT_RESULT_BFC0 ||
+ vert_result == VERT_RESULT_BFC1) &&
+ c->key.clamp_vertex_color) {
+ inst->saturate = true;
+ }
}
-
- return header_mrf;
-}
-
-int
-vec4_visitor::emit_vue_header_gen6(int header_mrf)
-{
- /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
- * dword 0-3 (m2) of the header is indices, point width, clip flags.
- * dword 4-7 (m3) is the 4D space position
- * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
- * enabled.
- *
- * m4 or 6 is the first vertex element data we fill.
- */
-
- current_annotation = "indices, point width, clip flags";
- emit_psiz_and_flags(brw_message_reg(header_mrf++));
-
- current_annotation = "gl_Position";
- emit(MOV(brw_message_reg(header_mrf++),
- src_reg(output_reg[VERT_RESULT_HPOS])));
-
- current_annotation = "user clip distances";
- if (c->key.nr_userclip) {
- emit_clip_distances(brw_message_reg(header_mrf++), 0);
- emit_clip_distances(brw_message_reg(header_mrf++), 4);
+ break;
}
-
- current_annotation = NULL;
-
- return header_mrf;
}
static int
@@ -1922,7 +1892,6 @@ vec4_visitor::emit_urb_writes()
int base_mrf = 1;
int mrf = base_mrf;
int urb_entry_size;
- uint64_t outputs_remaining = c->prog_data.outputs_written;
/* In the process of generating our URB write message contents, we
* may need to unspill a register or load from an array. Those
* reads would use MRFs 14-15.
@@ -1931,45 +1900,22 @@ vec4_visitor::emit_urb_writes()
/* FINISHME: edgeflag */
+ brw_compute_vue_map(&c->vue_map, intel, c->key.nr_userclip,
+ c->key.two_side_color, c->prog_data.outputs_written);
+
/* First mrf is the g0-based message header containing URB handles and such,
* which is implied in VS_OPCODE_URB_WRITE.
*/
mrf++;
- if (intel->gen >= 6) {
- mrf = emit_vue_header_gen6(mrf);
- } else {
- mrf = emit_vue_header_gen4(mrf);
+ if (intel->gen < 6) {
+ emit_ndc_computation();
}
/* Set up the VUE data for the first URB write */
- int attr;
- for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
- if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
- continue;
-
- outputs_remaining &= ~BITFIELD64_BIT(attr);
-
- /* This is set up in the VUE header. */
- if (attr == VERT_RESULT_HPOS)
- continue;
-
- /* This is loaded into the VUE header, and thus doesn't occupy
- * an attribute slot.
- */
- if (attr == VERT_RESULT_PSIZ)
- continue;
-
- vec4_instruction *inst = emit(MOV(brw_message_reg(mrf++),
- src_reg(output_reg[attr])));
-
- if ((attr == VERT_RESULT_COL0 ||
- attr == VERT_RESULT_COL1 ||
- attr == VERT_RESULT_BFC0 ||
- attr == VERT_RESULT_BFC1) &&
- c->key.clamp_vertex_color) {
- inst->saturate = true;
- }
+ int slot;
+ for (slot = 0; slot < c->vue_map.num_slots; ++slot) {
+ emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
/* If this was MRF 15, we can't fit anything more into this URB
* WRITE. Note that base_mrf of 1 means that MRF 15 is an
@@ -1977,7 +1923,7 @@ vec4_visitor::emit_urb_writes()
* gen6's requirements for length alignment.
*/
if (mrf > max_usable_mrf) {
- attr++;
+ slot++;
break;
}
}
@@ -1985,21 +1931,18 @@ vec4_visitor::emit_urb_writes()
vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
inst->base_mrf = base_mrf;
inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
- inst->eot = !outputs_remaining;
+ inst->eot = (slot >= c->vue_map.num_slots);
urb_entry_size = mrf - base_mrf;
/* Optional second URB write */
- if (outputs_remaining) {
+ if (!inst->eot) {
mrf = base_mrf + 1;
- for (; attr < VERT_RESULT_MAX; attr++) {
- if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
- continue;
-
+ for (; slot < c->vue_map.num_slots; ++slot) {
assert(mrf < max_usable_mrf);
- emit(MOV(brw_message_reg(mrf++), src_reg(output_reg[attr])));
+ emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]);
}
inst = emit(VS_OPCODE_URB_WRITE);