3 files changed, 59 insertions, 19 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3c8aed53cbe..87401351d45 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -479,11 +479,14 @@ public:
    void dump_instructions();
 
 protected:
+   void emit_vertex();
    virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0;
    virtual int setup_attributes(int payload_reg) = 0;
    virtual void emit_prolog() = 0;
    virtual void emit_program_code() = 0;
    virtual void emit_thread_end() = 0;
+   virtual void emit_urb_write_header(int mrf) = 0;
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
 };
 
 class vec4_vs_visitor : public vec4_visitor
@@ -502,6 +505,8 @@ protected:
    virtual void emit_prolog();
    virtual void emit_program_code();
    virtual void emit_thread_end();
+   virtual void emit_urb_write_header(int mrf);
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete);
 
 private:
    void setup_vp_regs();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 40e16156005..b9d1543c975 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2605,14 +2605,38 @@ align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
    return mlen;
 }
 
+void
+vec4_vs_visitor::emit_urb_write_header(int mrf)
+{
+   /* No need to do anything for VS; an implied write to this MRF will be
+    * performed by VS_OPCODE_URB_WRITE.
+    */
+   (void) mrf;
+}
+
+vec4_instruction *
+vec4_vs_visitor::emit_urb_write_opcode(bool complete)
+{
+   /* For VS, the URB writes end the thread. */
+   if (complete) {
+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+   }
+
+   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   inst->eot = complete;
+
+   return inst;
+}
+
 /**
- * Generates the VUE payload plus the 1 or 2 URB write instructions to
- * complete the VS thread.
+ * Generates the VUE payload plus the necessary URB write instructions to
+ * output it.
  *
  * The VUE layout is documented in Volume 2a.
  */
 void
-vec4_vs_visitor::emit_thread_end()
+vec4_visitor::emit_vertex()
 {
    /* MRF 0 is reserved for the debugger, so start with message header
     * in MRF 1.
@@ -2631,10 +2655,10 @@ vec4_vs_visitor::emit_thread_end()
     */
    assert ((max_usable_mrf - base_mrf) % 2 == 0);
 
-   /* First mrf is the g0-based message header containing URB handles and such,
-    * which is implied in VS_OPCODE_URB_WRITE.
+   /* First mrf is the g0-based message header containing URB handles and
+    * such.
     */
-   mrf++;
+   emit_urb_write_header(mrf++);
 
    if (intel->gen < 6) {
       emit_ndc_computation();
@@ -2654,19 +2678,14 @@ vec4_vs_visitor::emit_thread_end()
       }
    }
 
-   bool eot = slot >= prog_data->vue_map.num_slots;
-   if (eot) {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
-   }
+   bool complete = slot >= prog_data->vue_map.num_slots;
    current_annotation = "URB write";
-   vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+   vec4_instruction *inst = emit_urb_write_opcode(complete);
    inst->base_mrf = base_mrf;
    inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-   inst->eot = eot;
 
    /* Optional second URB write */
-   if (!inst->eot) {
+   if (!complete) {
       mrf = base_mrf + 1;
 
       for (; slot < prog_data->vue_map.num_slots; ++slot) {
@@ -2675,14 +2694,10 @@ vec4_vs_visitor::emit_thread_end()
          emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
       }
 
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
-
       current_annotation = "URB write";
-      inst = emit(VS_OPCODE_URB_WRITE);
+      inst = emit_urb_write_opcode(true /* complete */);
       inst->base_mrf = base_mrf;
       inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
-      inst->eot = true;
       /* URB destination offset.  In the previous write, we got MRFs
        * 2-13 minus the one header MRF, so 12 regs.  URB offset is in
        * URB row increments, and each of our MRFs is half of one of
@@ -2692,6 +2707,16 @@ vec4_vs_visitor::emit_thread_end()
    }
 }
 
+void
+vec4_vs_visitor::emit_thread_end()
+{
+   /* For VS, we always end the thread by emitting a single vertex.
+    * emit_urb_write_opcode() will take care of setting the eot flag on the
+    * SEND instruction.
+    */
+   emit_vertex();
+}
+
 src_reg
 vec4_visitor::get_scratch_offset(vec4_instruction *inst,
 				 src_reg *reladdr, int reg_offset)
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index e2cdeeab2c5..5c00a739f3b 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -79,6 +79,16 @@ protected:
    {
       assert(!"Not reached");
    }
+
+   virtual void emit_urb_write_header(int mrf)
+   {
+      assert(!"Not reached");
+   }
+
+   virtual vec4_instruction *emit_urb_write_opcode(bool complete)
+   {
+      assert(!"Not reached");
+   }
 };