summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp37
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h5
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp47
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp112
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp36
9 files changed, 170 insertions, 109 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 33245d701e0..8efd6796a97 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2193,7 +2193,8 @@ void brw_SAMPLE(struct brw_compile *p,
struct brw_context *brw = p->brw;
struct brw_instruction *insn;
- gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+ if (msg_reg_nr != -1)
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index da31b3e5eb9..e5d6e4b281e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -355,7 +355,8 @@ fs_inst::is_send_from_grf()
return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
opcode == SHADER_OPCODE_SHADER_TIME_ADD ||
(opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
- src[1].file == GRF));
+ src[1].file == GRF) ||
+ (is_tex() && src[0].file == GRF));
}
bool
@@ -436,6 +437,14 @@ fs_reg::equals(const fs_reg &r) const
imm.u == r.imm.u);
}
+fs_reg
+fs_reg::retype(uint32_t type)
+{
+ fs_reg result = *this;
+ result.type = type;
+ return result;
+}
+
bool
fs_reg::is_zero() const
{
@@ -698,6 +707,18 @@ fs_inst::is_partial_write()
this->force_sechalf);
}
+int
+fs_inst::regs_read(fs_visitor *v, int arg)
+{
+ if (is_tex() && arg == 0 && src[0].file == GRF) {
+ if (v->dispatch_width == 16)
+ return (mlen + 1) / 2;
+ else
+ return mlen;
+ }
+ return 1;
+}
+
/**
* Returns how many MRFs an FS opcode will write over.
*
@@ -710,6 +731,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
if (inst->mlen == 0)
return 0;
+ if (inst->base_mrf == -1)
+ return 0;
+
switch (inst->opcode) {
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
@@ -2194,6 +2218,13 @@ fs_visitor::register_coalesce()
break;
}
+ if (scan_inst->mlen > 0 && scan_inst->base_mrf == -1 &&
+ scan_inst->src[0].file == GRF &&
+ scan_inst->src[0].reg == inst->dst.reg) {
+ interfered = true;
+ break;
+ }
+
/* The accumulator result appears to get used for the
* conditional modifier generation. When negating a UD
* value, there is a 33rd bit generated for the sign in the
@@ -2382,7 +2413,7 @@ fs_visitor::compute_to_mrf()
}
}
- if (scan_inst->mlen > 0) {
+ if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) {
/* Found a SEND instruction, which means that there are
* live values in MRFs from base_mrf to base_mrf +
* scan_inst->mlen - 1. Don't go pushing our MRF write up
@@ -2444,7 +2475,7 @@ fs_visitor::remove_duplicate_mrf_writes()
last_mrf_move[inst->dst.reg] = NULL;
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
/* Found a SEND instruction, which will include two or fewer
* implied MRF writes. We could do better here.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 360dbadc19d..c78f9ae7961 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -78,6 +78,7 @@ public:
bool is_zero() const;
bool is_one() const;
bool is_valid_3src() const;
+ fs_reg retype(uint32_t type);
/** Register file: GRF, MRF, IMM. */
enum register_file file;
@@ -145,6 +146,7 @@ public:
bool overwrites_reg(const fs_reg &reg);
bool is_send_from_grf();
bool is_partial_write();
+ int regs_read(fs_visitor *v, int arg);
fs_reg dst;
fs_reg src[3];
@@ -354,7 +356,8 @@ public:
void try_replace_with_sel();
void emit_bool_to_cond_code(ir_rvalue *condition);
void emit_if_gen6(ir_if *ir);
- void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset);
+ void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset,
+ int count);
void emit_fragment_program_code();
void setup_fp_regs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index fb6fe184f33..7b90982a2a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
if (entry->src.file == IMM)
return false;
+ if (inst->regs_read(this, arg) > 1)
+ return false;
+
if (inst->src[arg].file != entry->dst.file ||
inst->src[arg].reg != entry->dst.reg ||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index dbfbc113dc7..4b668f162aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -501,24 +501,43 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
dst = vec16(dst);
}
+ if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) {
+ /* The send-from-GRF for 16-wide texturing with a header has an extra
+ * hardware register allocated to it, which we need to skip over (since
+ * our coordinates in the payload are in the even-numbered registers,
+ * and the header comes right before the first one).
+ */
+ assert(src.file == BRW_GENERAL_REGISTER_FILE);
+ src.nr++;
+ }
+
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
*/
if (inst->texture_offset) {
+ struct brw_reg header_reg;
+
+ if (brw->gen >= 7) {
+ header_reg = src;
+ } else {
+ assert(inst->base_mrf != -1);
+ header_reg = retype(brw_message_reg(inst->base_mrf),
+ BRW_REGISTER_TYPE_UD);
+ }
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
/* Explicitly set up the message header by copying g0 to the MRF. */
- brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* Then set the offset bits in DWord 2. */
- brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
- inst->base_mrf, 2), BRW_REGISTER_TYPE_UD),
+ brw_MOV(p, retype(brw_vec1_reg(header_reg.file,
+ header_reg.nr, 2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(inst->texture_offset));
brw_pop_insn_state(p);
} else if (inst->header_present) {
+ assert(brw->gen < 7);
/* Set up an implied move from g0 to the MRF. */
src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 50aa7a62ae3..b3026c26850 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -151,14 +151,7 @@ fs_live_variables::setup_def_use()
if (reg.file != GRF)
continue;
- int regs_read = 1;
- /* We don't know how many components are read in a send-from-grf,
- * so just assume "all of them."
- */
- if (inst->is_send_from_grf())
- regs_read = v->virtual_grf_sizes[reg.reg];
-
- for (int i = 0; i < regs_read; i++) {
+ for (int j = 0; j < inst->regs_read(v, i); j++) {
setup_one_read(block, inst, ip, reg);
reg.reg_offset++;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index f0f4ad9a928..157c9ae4ffa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -512,19 +512,25 @@ fs_visitor::assign_regs()
}
void
-fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset)
+fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset,
+ int count)
{
- fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
- unspill_inst->offset = spill_offset;
- unspill_inst->ir = inst->ir;
- unspill_inst->annotation = inst->annotation;
+ for (int i = 0; i < count; i++) {
+ fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst);
+ unspill_inst->offset = spill_offset;
+ unspill_inst->ir = inst->ir;
+ unspill_inst->annotation = inst->annotation;
+
+ /* Choose a MRF that won't conflict with an MRF that's live across the
+ * spill. Nothing else will make it up to MRF 14/15.
+ */
+ unspill_inst->base_mrf = 14;
+ unspill_inst->mlen = 1; /* header contains offset */
+ inst->insert_before(unspill_inst);
- /* Choose a MRF that won't conflict with an MRF that's live across the
- * spill. Nothing else will make it up to MRF 14/15.
- */
- unspill_inst->base_mrf = 14;
- unspill_inst->mlen = 1; /* header contains offset */
- inst->insert_before(unspill_inst);
+ dst.reg_offset++;
+ spill_offset += REG_SIZE;
+ }
}
int
@@ -623,9 +629,14 @@ fs_visitor::spill_reg(int spill_reg)
for (unsigned int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF &&
inst->src[i].reg == spill_reg) {
- inst->src[i].reg = virtual_grf_alloc(1);
- emit_unspill(inst, inst->src[i],
- spill_offset + REG_SIZE * inst->src[i].reg_offset);
+ int regs_read = inst->regs_read(this, i);
+
+ inst->src[i].reg = virtual_grf_alloc(regs_read);
+ inst->src[i].reg_offset = 0;
+
+ emit_unspill(inst, inst->src[i],
+ spill_offset + REG_SIZE * inst->src[i].reg_offset,
+ regs_read);
}
}
@@ -641,12 +652,8 @@ fs_visitor::spill_reg(int spill_reg)
* since we write back out all of the regs_written().
*/
if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
- fs_reg unspill_reg = inst->dst;
- for (int chan = 0; chan < inst->regs_written; chan++) {
- emit_unspill(inst, unspill_reg,
- subset_spill_offset + REG_SIZE * chan);
- unspill_reg.reg_offset++;
- }
+ emit_unspill(inst, inst->dst, subset_spill_offset,
+ inst->regs_written);
}
fs_reg spill_src = inst->dst;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 728567cc2a8..e659203dd58 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1226,27 +1226,28 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
fs_reg shadow_c, fs_reg lod, fs_reg lod2,
fs_reg sample_index)
{
- int mlen = 0;
- int base_mrf = 2;
int reg_width = dispatch_width / 8;
bool header_present = false;
int offsets[3];
+ fs_reg payload = fs_reg(this, glsl_type::float_type);
+ fs_reg next = payload;
+
if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) {
- /* * The offsets set up by the ir_texture visitor are in the
- * m1 header, so we can't go headerless.
+ /* For general texture offsets (no txf workaround), we need a header to
+ * put them in. Note that for 16-wide we're making space for two actual
+ * hardware registers here, so the emit will have to fix up for this.
*
* * ir4_tg4 needs to place its channel select in the header,
* for interaction with ARB_texture_swizzle
*/
header_present = true;
- mlen++;
- base_mrf--;
+ next.reg_offset++;
}
if (ir->shadow_comparitor) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
- mlen += reg_width;
+ emit(MOV(next, shadow_c));
+ next.reg_offset++;
}
/* Set up the LOD info */
@@ -1256,12 +1257,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
case ir_tg4:
break;
case ir_txb:
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
- mlen += reg_width;
+ emit(MOV(next, lod));
+ next.reg_offset++;
break;
case ir_txl:
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
- mlen += reg_width;
+ emit(MOV(next, lod));
+ next.reg_offset++;
break;
case ir_txd: {
if (dispatch_width == 16)
@@ -1271,32 +1272,32 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+ emit(MOV(next, coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
/* For cube map array, the coordinate is (u,v,r,ai) but there are
* only derivatives for (u, v, r).
*/
if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
+ emit(MOV(next, lod));
lod.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
- emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
+ emit(MOV(next, lod2));
lod2.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
}
break;
}
case ir_txs:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod));
+ next.reg_offset++;
break;
case ir_query_levels:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0)));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ next.reg_offset++;
break;
case ir_txf:
/* It appears that the ld instruction used for txf does its
@@ -1314,40 +1315,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
}
/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
- emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate, offsets[0]));
+ emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0]));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod));
+ next.reg_offset++;
for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
- emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate, offsets[i]));
+ emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i]));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
break;
case ir_txf_ms:
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
+ next.reg_offset++;
/* constant zero MCS; we arrange to never actually have a compressed
* multisample surface here for now. TODO: issue ld_mcs to get this first,
* if we ever support texturing from compressed multisample surfaces
*/
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
- mlen += reg_width;
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+ next.reg_offset++;
/* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
- coordinate));
+ emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
break;
}
@@ -1355,32 +1353,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
/* Set up the coordinate (except for cases where it was done above) */
if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) {
for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
- emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
+ emit(MOV(next, coordinate));
coordinate.reg_offset++;
- mlen += reg_width;
+ next.reg_offset++;
}
}
/* Generate the SEND */
fs_inst *inst = NULL;
switch (ir->op) {
- case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
- case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
- case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
- case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
- case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
- case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
- case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
- case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst); break;
- case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
- case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break;
- }
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
+ case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
+ case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
+ case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
+ case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
+ case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
+ case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+ case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+ case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
+ case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
+ case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break;
+ }
+ inst->base_mrf = -1;
+ if (reg_width == 2)
+ inst->mlen = next.reg_offset * reg_width - header_present;
+ else
+ inst->mlen = next.reg_offset * reg_width;
+
inst->header_present = header_present;
inst->regs_written = 4;
- if (mlen > 11) {
+ virtual_grf_sizes[payload.reg] = next.reg_offset;
+ if (inst->mlen > 11) {
fail("Message length >11 disallowed by hardware\n");
}
@@ -1591,9 +1594,6 @@ fs_visitor::visit(ir_texture *ir)
lod, lod2);
}
- /* The header is set up by generate_tex() when necessary. */
- inst->src[0] = reg_undef;
-
if (ir->offset != NULL && ir->op != ir_txf)
inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 735ad93561e..b24c38c351d 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -569,7 +569,7 @@ fs_instruction_scheduler::calculate_deps()
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
+ for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
add_dep(last_grf_write[inst->src[i].reg + r], n);
} else {
add_dep(last_grf_write[inst->src[i].reg], n);
@@ -594,12 +594,14 @@ fs_instruction_scheduler::calculate_deps()
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(last_mrf_write[inst->base_mrf + i], n);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(last_mrf_write[inst->base_mrf + i], n);
+ }
}
if (inst->predicate) {
@@ -642,7 +644,7 @@ fs_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
add_dep(last_mrf_write[inst->base_mrf + i], n);
last_mrf_write[inst->base_mrf + i] = n;
@@ -677,7 +679,7 @@ fs_instruction_scheduler::calculate_deps()
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
if (post_reg_alloc) {
- for (int r = 0; r < reg_width; r++)
+ for (int r = 0; r < reg_width * inst->regs_read(v, i); r++)
add_dep(n, last_grf_write[inst->src[i].reg + r]);
} else {
add_dep(n, last_grf_write[inst->src[i].reg]);
@@ -702,12 +704,14 @@ fs_instruction_scheduler::calculate_deps()
}
}
- for (int i = 0; i < inst->mlen; i++) {
- /* It looks like the MRF regs are released in the send
- * instruction once it's sent, not when the result comes
- * back.
- */
- add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ if (inst->base_mrf != -1) {
+ for (int i = 0; i < inst->mlen; i++) {
+ /* It looks like the MRF regs are released in the send
+ * instruction once it's sent, not when the result comes
+ * back.
+ */
+ add_dep(n, last_mrf_write[inst->base_mrf + i], 2);
+ }
}
if (inst->predicate) {
@@ -749,7 +753,7 @@ fs_instruction_scheduler::calculate_deps()
add_barrier_deps(n);
}
- if (inst->mlen > 0) {
+ if (inst->mlen > 0 && inst->base_mrf != -1) {
for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
last_mrf_write[inst->base_mrf + i] = n;
}