9 files changed, 170 insertions, 109 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 33245d701e0..8efd6796a97 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2193,7 +2193,8 @@ void brw_SAMPLE(struct brw_compile *p,
    struct brw_context *brw = p->brw;
    struct brw_instruction *insn;
 
-   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+   if (msg_reg_nr != -1)
+      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
    insn = next_insn(p, BRW_OPCODE_SEND);
    insn->header.predicate_control = 0; /* XXX */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index da31b3e5eb9..e5d6e4b281e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -355,7 +355,8 @@ fs_inst::is_send_from_grf()
    return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
            opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
            (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
-            src[1].file == GRF));
+            src[1].file == GRF) ||
+           (is_tex() && src[0].file == GRF));
 }
 
 bool
@@ -436,6 +437,14 @@ fs_reg::equals(const fs_reg &r) const
            imm.u == r.imm.u);
 }
 
+fs_reg
+fs_reg::retype(uint32_t type)
+{
+   fs_reg result = *this;
+   result.type = type;
+   return result;
+}
+
 bool
 fs_reg::is_zero() const
 {
@@ -698,6 +707,18 @@ fs_inst::is_partial_write()
            this->force_sechalf);
 }
 
+int
+fs_inst::regs_read(fs_visitor *v, int arg)
+{
+   if (is_tex() && arg == 0 && src[0].file == GRF) {
+      if (v->dispatch_width == 16)
+	 return (mlen + 1) / 2;
+      else
+	 return mlen;
+   }
+   return 1;
+}
+
 /**
  * Returns how many MRFs an FS opcode will write over.
  *
@@ -710,6 +731,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    if (inst->mlen == 0)
       return 0;
 
+   if (inst->base_mrf == -1)
+      return 0;
+
    switch (inst->opcode) {
    case SHADER_OPCODE_RCP:
    case SHADER_OPCODE_RSQ:
@@ -2194,6 +2218,13 @@ fs_visitor::register_coalesce()
 	    break;
 	 }
 
+	 if (scan_inst->mlen > 0 && scan_inst->base_mrf == -1 &&
+	     scan_inst->src[0].file == GRF &&
+	     scan_inst->src[0].reg == inst->dst.reg) {
+	    interfered = true;
+	    break;
+	 }
+
 	 /* The accumulator result appears to get used for the
 	  * conditional modifier generation.  When negating a UD
 	  * value, there is a 33rd bit generated for the sign in the
@@ -2382,7 +2413,7 @@ fs_visitor::compute_to_mrf()
 	    }
 	 }
 
-	 if (scan_inst->mlen > 0) {
+	 if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) {
 	    /* Found a SEND instruction, which means that there are
 	     * live values in MRFs from base_mrf to base_mrf +
 	     * scan_inst->mlen - 1.  Don't go pushing our MRF write up
@@ -2444,7 +2475,7 @@ fs_visitor::remove_duplicate_mrf_writes()
 	 last_mrf_move[inst->dst.reg] = NULL;
       }
 
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
 	 /* Found a SEND instruction, which will include two or fewer
 	  * implied MRF writes.  We could do better here.
 	  */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 360dbadc19d..c78f9ae7961 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -78,6 +78,7 @@ public:
    bool is_zero() const;
    bool is_one() const;
    bool is_valid_3src() const;
+   fs_reg retype(uint32_t type);
 
    /** Register file: GRF, MRF, IMM. */
    enum register_file file;
@@ -145,6 +146,7 @@ public:
    bool overwrites_reg(const fs_reg &reg);
    bool is_send_from_grf();
    bool is_partial_write();
+   int regs_read(fs_visitor *v, int arg);
 
    fs_reg dst;
    fs_reg src[3];
@@ -354,7 +356,8 @@ public:
    void try_replace_with_sel();
    void emit_bool_to_cond_code(ir_rvalue *condition);
    void emit_if_gen6(ir_if *ir);
-   void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+   void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset,
+                     int count);
 
    void emit_fragment_program_code();
    void setup_fp_regs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index fb6fe184f33..7b90982a2a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    if (entry->src.file == IMM)
       return false;
 
+   if (inst->regs_read(this, arg) > 1)
+      return false;
+
    if (inst->src[arg].file != entry->dst.file ||
        inst->src[arg].reg != entry->dst.reg ||
        inst->src[arg].reg_offset != entry->dst.reg_offset) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index dbfbc113dc7..4b668f162aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -501,24 +501,43 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
       dst = vec16(dst);
    }
 
+   if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
+      /* The send-from-GRF for 16-wide texturing with a header has an extra
+       * hardware register allocated to it, which we need to skip over (since
+       * our coordinates in the payload are in the even-numbered registers,
+       * and the header comes right before the first one).
+       */
+      assert(src.file == BRW_GENERAL_REGISTER_FILE);
+      src.nr++;
+   }
+
    /* Load the message header if present.  If there's a texture offset,
     * we need to set it up explicitly and load the offset bitfield.
     * Otherwise, we can use an implied move from g0 to the first message reg.
     */
    if (inst->texture_offset) {
+      struct brw_reg header_reg;
+
+      if (brw->gen >= 7) {
+         header_reg = src;
+      } else {
+         assert(inst->base_mrf != -1);
+         header_reg = retype(brw_message_reg(inst->base_mrf),
+                             BRW_REGISTER_TYPE_UD);
+      }
       brw_push_insn_state(p);
       brw_set_mask_control(p, BRW_MASK_DISABLE);
       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
       /* Explicitly set up the message header by copying g0 to the MRF. */
-      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
-                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+      brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
       /* Then set the offset bits in DWord 2. */
-      brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
-                                     inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+      brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
+                                     header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
                  brw_imm_ud(inst->texture_offset));
       brw_pop_insn_state(p);
    } else if (inst->header_present) {
+      assert(brw->gen < 7);
       /* Set up an implied move from g0 to the MRF. */
       src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 50aa7a62ae3..b3026c26850 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -151,14 +151,7 @@ fs_live_variables::setup_def_use()
             if (reg.file != GRF)
                continue;
 
-            int regs_read = 1;
-            /* We don't know how many components are read in a send-from-grf,
-             * so just assume "all of them."
-             */
-            if (inst->is_send_from_grf())
-               regs_read = v->virtual_grf_sizes[reg.reg];
-
-            for (int i = 0; i < regs_read; i++) {
+            for (int j = 0; j < inst->regs_read(v, i); j++) {
                setup_one_read(block, inst, ip, reg);
                reg.reg_offset++;
             }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f0f4ad9a928..157c9ae4ffa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -512,19 +512,25 @@ fs_visitor::assign_regs()
 }
 
 void
-fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
+fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,
+                         int count)
 {
-   fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
-   unspill_inst->offset = spill_offset;
-   unspill_inst->ir = inst->ir;
-   unspill_inst->annotation = inst->annotation;
+   for (int i = 0; i < count; i++) {
+      fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+      unspill_inst->offset = spill_offset;
+      unspill_inst->ir = inst->ir;
+      unspill_inst->annotation = inst->annotation;
+
+      /* Choose a MRF that won't conflict with an MRF that's live across the
+       * spill.  Nothing else will make it up to MRF 14/15.
+       */
+      unspill_inst->base_mrf = 14;
+      unspill_inst->mlen = 1; /* header contains offset */
+      inst->insert_before(unspill_inst);
 
-   /* Choose a MRF that won't conflict with an MRF that's live across the
-    * spill.  Nothing else will make it up to MRF 14/15.
-    */
-   unspill_inst->base_mrf = 14;
-   unspill_inst->mlen = 1; /* header contains offset */
-   inst->insert_before(unspill_inst);
+      dst.reg_offset++;
+      spill_offset += REG_SIZE;
+   }
 }
 
 int
@@ -623,9 +629,14 @@ fs_visitor::spill_reg(int spill_reg)
       for (unsigned int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF &&
 	     inst->src[i].reg == spill_reg) {
-	    inst->src[i].reg = virtual_grf_alloc(1);
-	    emit_unspill(inst, inst->src[i],
-                         spill_offset + REG_SIZE * inst->src[i].reg_offset);
+            int regs_read = inst->regs_read(this, i);
+
+            inst->src[i].reg = virtual_grf_alloc(regs_read);
+            inst->src[i].reg_offset = 0;
+
+            emit_unspill(inst, inst->src[i],
+                         spill_offset + REG_SIZE * inst->src[i].reg_offset,
+                         regs_read);
 	 }
       }
 
@@ -641,12 +652,8 @@ fs_visitor::spill_reg(int spill_reg)
           * since we write back out all of the regs_written().
 	  */
 	 if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
-            fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written; chan++) {
-               emit_unspill(inst, unspill_reg,
-                            subset_spill_offset + REG_SIZE * chan);
-               unspill_reg.reg_offset++;
-            }
+            emit_unspill(inst, inst->dst, subset_spill_offset,
+                         inst->regs_written);
 	 }
 
 	 fs_reg spill_src = inst->dst;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 728567cc2a8..e659203dd58 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1226,27 +1226,28 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
                               fs_reg shadow_c, fs_reg lod, fs_reg lod2,
                               fs_reg sample_index)
 {
-   int mlen = 0;
-   int base_mrf = 2;
    int reg_width = dispatch_width / 8;
    bool header_present = false;
    int offsets[3];
 
+   fs_reg payload = fs_reg(this, glsl_type::float_type);
+   fs_reg next = payload;
+
    if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
-      /* * The offsets set up by the ir_texture visitor are in the
-       * m1 header, so we can't go headerless.
+      /* For general texture offsets (no txf workaround), we need a header to
+       * put them in.  Note that for 16-wide we're making space for two actual
+       * hardware registers here, so the emit will have to fix up for this.
        *
        * * ir4_tg4 needs to place its channel select in the header,
        * for interaction with ARB_texture_swizzle
        */
       header_present = true;
-      mlen++;
-      base_mrf--;
+      next.reg_offset++;
    }
 
    if (ir->shadow_comparitor) {
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
-      mlen += reg_width;
+      emit(MOV(next, shadow_c));
+      next.reg_offset++;
    }
 
    /* Set up the LOD info */
@@ -1256,12 +1257,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    case ir_tg4:
       break;
    case ir_txb:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
-      mlen += reg_width;
+      emit(MOV(next, lod));
+      next.reg_offset++;
       break;
    case ir_txl:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
-      mlen += reg_width;
+      emit(MOV(next, lod));
+      next.reg_offset++;
       break;
    case ir_txd: {
       if (dispatch_width == 16)
@@ -1271,32 +1272,32 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
        * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
        */
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+	 emit(MOV(next, coordinate));
 	 coordinate.reg_offset++;
-	 mlen += reg_width;
+	 next.reg_offset++;
 
          /* For cube map array, the coordinate is (u,v,r,ai) but there are
           * only derivatives for (u, v, r).
           */
          if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
-            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+            emit(MOV(next, lod));
             lod.reg_offset++;
-            mlen += reg_width;
+            next.reg_offset++;
 
-            emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+            emit(MOV(next, lod2));
             lod2.reg_offset++;
-            mlen += reg_width;
+            next.reg_offset++;
          }
       }
       break;
    }
    case ir_txs:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod));
+      next.reg_offset++;
       break;
    case ir_query_levels:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0)));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+      next.reg_offset++;
       break;
    case ir_txf:
       /* It appears that the ld instruction used for txf does its
@@ -1314,40 +1315,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       }
 
       /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
-      emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-               coordinate, offsets[0]));
+      emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0]));
       coordinate.reg_offset++;
-      mlen += reg_width;
+      next.reg_offset++;
 
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod));
+      next.reg_offset++;
 
       for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-                  coordinate, offsets[i]));
+	 emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i]));
 	 coordinate.reg_offset++;
-	 mlen += reg_width;
+	 next.reg_offset++;
       }
       break;
    case ir_txf_ms:
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
+      next.reg_offset++;
 
       /* constant zero MCS; we arrange to never actually have a compressed
        * multisample surface here for now. TODO: issue ld_mcs to get this first,
        * if we ever support texturing from compressed multisample surfaces
        */
-      emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
-      mlen += reg_width;
+      emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+      next.reg_offset++;
 
       /* there is no offsetting for this message; just copy in the integer
        * texture coordinates
        */
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-         emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
-                  coordinate));
+         emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
          coordinate.reg_offset++;
-         mlen += reg_width;
+         next.reg_offset++;
       }
       break;
    }
@@ -1355,32 +1353,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
    /* Set up the coordinate (except for cases where it was done above) */
    if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) {
       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
-	 emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+	 emit(MOV(next, coordinate));
 	 coordinate.reg_offset++;
-	 mlen += reg_width;
+	 next.reg_offset++;
       }
    }
 
    /* Generate the SEND */
    fs_inst *inst = NULL;
    switch (ir->op) {
-   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
-   case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
-   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
-   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
-   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
-   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
-   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
-   case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst); break;
-   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
-   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
-   }
-   inst->base_mrf = base_mrf;
-   inst->mlen = mlen;
+   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
+   case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
+   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
+   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
+   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
+   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+   case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
+   case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break;
+   }
+   inst->base_mrf = -1;
+   if (reg_width == 2)
+      inst->mlen = next.reg_offset * reg_width - header_present;
+   else
+      inst->mlen = next.reg_offset * reg_width;
+
    inst->header_present = header_present;
    inst->regs_written = 4;
 
-   if (mlen > 11) {
+   virtual_grf_sizes[payload.reg] = next.reg_offset;
+   if (inst->mlen > 11) {
       fail("Message length >11 disallowed by hardware\n");
    }
 
@@ -1591,9 +1594,6 @@ fs_visitor::visit(ir_texture *ir)
                                lod, lod2);
    }
 
-   /* The header is set up by generate_tex() when necessary. */
-   inst->src[0] = reg_undef;
-
    if (ir->offset != NULL && ir->op != ir_txf)
       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
 
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 735ad93561e..b24c38c351d 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -569,7 +569,7 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
                   add_dep(last_grf_write[inst->src[i].reg + r], n);
             } else {
                add_dep(last_grf_write[inst->src[i].reg], n);
@@ -594,12 +594,14 @@ fs_instruction_scheduler::calculate_deps()
 	 }
       }
 
-      for (int i = 0; i < inst->mlen; i++) {
-	 /* It looks like the MRF regs are released in the send
-	  * instruction once it's sent, not when the result comes
-	  * back.
-	  */
-	 add_dep(last_mrf_write[inst->base_mrf + i], n);
+      if (inst->base_mrf != -1) {
+	 for (int i = 0; i < inst->mlen; i++) {
+	    /* It looks like the MRF regs are released in the send
+	     * instruction once it's sent, not when the result comes
+	     * back.
+	     */
+	    add_dep(last_mrf_write[inst->base_mrf + i], n);
+	 }
       }
 
       if (inst->predicate) {
@@ -642,7 +644,7 @@ fs_instruction_scheduler::calculate_deps()
 	 add_barrier_deps(n);
       }
 
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
 	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
 	    add_dep(last_mrf_write[inst->base_mrf + i], n);
 	    last_mrf_write[inst->base_mrf + i] = n;
@@ -677,7 +679,7 @@ fs_instruction_scheduler::calculate_deps()
       for (int i = 0; i < 3; i++) {
 	 if (inst->src[i].file == GRF) {
             if (post_reg_alloc) {
-               for (int r = 0; r < reg_width; r++)
+               for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
                   add_dep(n, last_grf_write[inst->src[i].reg + r]);
             } else {
                add_dep(n, last_grf_write[inst->src[i].reg]);
@@ -702,12 +704,14 @@ fs_instruction_scheduler::calculate_deps()
 	 }
       }
 
-      for (int i = 0; i < inst->mlen; i++) {
-	 /* It looks like the MRF regs are released in the send
-	  * instruction once it's sent, not when the result comes
-	  * back.
-	  */
-	 add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+      if (inst->base_mrf != -1) {
+	 for (int i = 0; i < inst->mlen; i++) {
+	    /* It looks like the MRF regs are released in the send
+	     * instruction once it's sent, not when the result comes
+	     * back.
+	     */
+	    add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+	 }
       }
 
       if (inst->predicate) {
@@ -749,7 +753,7 @@ fs_instruction_scheduler::calculate_deps()
 	 add_barrier_deps(n);
       }
 
-      if (inst->mlen > 0) {
+      if (inst->mlen > 0 && inst->base_mrf != -1) {
 	 for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
 	    last_mrf_write[inst->base_mrf + i] = n;
 	 }