diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 37 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 5 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 9 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 47 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 112 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 36 |
9 files changed, 170 insertions, 109 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 33245d701e0..8efd6796a97 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2193,7 +2193,8 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_context *brw = p->brw; struct brw_instruction *insn; - gen6_resolve_implied_move(p, &src0, msg_reg_nr); + if (msg_reg_nr != -1) + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index da31b3e5eb9..e5d6e4b281e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -355,7 +355,8 @@ fs_inst::is_send_from_grf() return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 || opcode == SHADER_OPCODE_SHADER_TIME_ADD || (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD && - src[1].file == GRF)); + src[1].file == GRF) || + (is_tex() && src[0].file == GRF)); } bool @@ -436,6 +437,14 @@ fs_reg::equals(const fs_reg &r) const imm.u == r.imm.u); } +fs_reg +fs_reg::retype(uint32_t type) +{ + fs_reg result = *this; + result.type = type; + return result; +} + bool fs_reg::is_zero() const { @@ -698,6 +707,18 @@ fs_inst::is_partial_write() this->force_sechalf); } +int +fs_inst::regs_read(fs_visitor *v, int arg) +{ + if (is_tex() && arg == 0 && src[0].file == GRF) { + if (v->dispatch_width == 16) + return (mlen + 1) / 2; + else + return mlen; + } + return 1; +} + /** * Returns how many MRFs an FS opcode will write over. * @@ -710,6 +731,9 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) if (inst->mlen == 0) return 0; + if (inst->base_mrf == -1) + return 0; + switch (inst->opcode) { case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: @@ -2194,6 +2218,13 @@ fs_visitor::register_coalesce() break; } + if (scan_inst->mlen > 0 && scan_inst->base_mrf == -1 && + scan_inst->src[0].file == GRF && + scan_inst->src[0].reg == inst->dst.reg) { + interfered = true; + break; + } + /* The accumulator result appears to get used for the * conditional modifier generation. When negating a UD * value, there is a 33rd bit generated for the sign in the @@ -2382,7 +2413,7 @@ fs_visitor::compute_to_mrf() } } - if (scan_inst->mlen > 0) { + if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) { /* Found a SEND instruction, which means that there are * live values in MRFs from base_mrf to base_mrf + * scan_inst->mlen - 1. Don't go pushing our MRF write up @@ -2444,7 +2475,7 @@ fs_visitor::remove_duplicate_mrf_writes() last_mrf_move[inst->dst.reg] = NULL; } - if (inst->mlen > 0) { + if (inst->mlen > 0 && inst->base_mrf != -1) { /* Found a SEND instruction, which will include two or fewer * implied MRF writes. We could do better here. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 360dbadc19d..c78f9ae7961 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -78,6 +78,7 @@ public: bool is_zero() const; bool is_one() const; bool is_valid_3src() const; + fs_reg retype(uint32_t type); /** Register file: GRF, MRF, IMM. */ enum register_file file; @@ -145,6 +146,7 @@ public: bool overwrites_reg(const fs_reg ®); bool is_send_from_grf(); bool is_partial_write(); + int regs_read(fs_visitor *v, int arg); fs_reg dst; fs_reg src[3]; @@ -354,7 +356,8 @@ public: void try_replace_with_sel(); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); - void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); + void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset, + int count); void emit_fragment_program_code(); void setup_fp_regs(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index fb6fe184f33..7b90982a2a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -279,6 +279,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->src.file == IMM) return false; + if (inst->regs_read(this, arg) > 1) + return false; + if (inst->src[arg].file != entry->dst.file || inst->src[arg].reg != entry->dst.reg || inst->src[arg].reg_offset != entry->dst.reg_offset) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index dbfbc113dc7..4b668f162aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -501,24 +501,43 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src dst = vec16(dst); } + if (brw->gen >= 7 && inst->header_present && dispatch_width == 16) { + /* The send-from-GRF for 16-wide texturing with a header has an extra + * hardware register allocated to it, which we need to skip over (since + * our coordinates in the payload are in the even-numbered registers, + * and the header comes right before the first one). + */ + assert(src.file == BRW_GENERAL_REGISTER_FILE); + src.nr++; + } + /* Load the message header if present. If there's a texture offset, * we need to set it up explicitly and load the offset bitfield. * Otherwise, we can use an implied move from g0 to the first message reg. */ if (inst->texture_offset) { + struct brw_reg header_reg; + + if (brw->gen >= 7) { + header_reg = src; + } else { + assert(inst->base_mrf != -1); + header_reg = retype(brw_message_reg(inst->base_mrf), + BRW_REGISTER_TYPE_UD); + } brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* Explicitly set up the message header by copying g0 to the MRF. */ - brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_MOV(p, header_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* Then set the offset bits in DWord 2. */ - brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, - inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), + brw_MOV(p, retype(brw_vec1_reg(header_reg.file, + header_reg.nr, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(inst->texture_offset)); brw_pop_insn_state(p); } else if (inst->header_present) { + assert(brw->gen < 7); /* Set up an implied move from g0 to the MRF. */ src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 50aa7a62ae3..b3026c26850 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -151,14 +151,7 @@ fs_live_variables::setup_def_use() if (reg.file != GRF) continue; - int regs_read = 1; - /* We don't know how many components are read in a send-from-grf, - * so just assume "all of them." - */ - if (inst->is_send_from_grf()) - regs_read = v->virtual_grf_sizes[reg.reg]; - - for (int i = 0; i < regs_read; i++) { + for (int j = 0; j < inst->regs_read(v, i); j++) { setup_one_read(block, inst, ip, reg); reg.reg_offset++; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f0f4ad9a928..157c9ae4ffa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -512,19 +512,25 @@ fs_visitor::assign_regs() } void -fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset) +fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset, + int count) { - fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst); - unspill_inst->offset = spill_offset; - unspill_inst->ir = inst->ir; - unspill_inst->annotation = inst->annotation; + for (int i = 0; i < count; i++) { + fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, dst); + unspill_inst->offset = spill_offset; + unspill_inst->ir = inst->ir; + unspill_inst->annotation = inst->annotation; + + /* Choose a MRF that won't conflict with an MRF that's live across the + * spill. Nothing else will make it up to MRF 14/15. + */ + unspill_inst->base_mrf = 14; + unspill_inst->mlen = 1; /* header contains offset */ + inst->insert_before(unspill_inst); - /* Choose a MRF that won't conflict with an MRF that's live across the - * spill. Nothing else will make it up to MRF 14/15. - */ - unspill_inst->base_mrf = 14; - unspill_inst->mlen = 1; /* header contains offset */ - inst->insert_before(unspill_inst); + dst.reg_offset++; + spill_offset += REG_SIZE; + } } int @@ -623,9 +629,14 @@ fs_visitor::spill_reg(int spill_reg) for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg) { - inst->src[i].reg = virtual_grf_alloc(1); - emit_unspill(inst, inst->src[i], - spill_offset + REG_SIZE * inst->src[i].reg_offset); + int regs_read = inst->regs_read(this, i); + + inst->src[i].reg = virtual_grf_alloc(regs_read); + inst->src[i].reg_offset = 0; + + emit_unspill(inst, inst->src[i], + spill_offset + REG_SIZE * inst->src[i].reg_offset, + regs_read); } } @@ -641,12 +652,8 @@ fs_visitor::spill_reg(int spill_reg) * since we write back out all of the regs_written(). */ if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) { - fs_reg unspill_reg = inst->dst; - for (int chan = 0; chan < inst->regs_written; chan++) { - emit_unspill(inst, unspill_reg, - subset_spill_offset + REG_SIZE * chan); - unspill_reg.reg_offset++; - } + emit_unspill(inst, inst->dst, subset_spill_offset, + inst->regs_written); } fs_reg spill_src = inst->dst; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 728567cc2a8..e659203dd58 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1226,27 +1226,28 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, fs_reg shadow_c, fs_reg lod, fs_reg lod2, fs_reg sample_index) { - int mlen = 0; - int base_mrf = 2; int reg_width = dispatch_width / 8; bool header_present = false; int offsets[3]; + fs_reg payload = fs_reg(this, glsl_type::float_type); + fs_reg next = payload; + if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf)) { - /* * The offsets set up by the ir_texture visitor are in the - * m1 header, so we can't go headerless. + /* For general texture offsets (no txf workaround), we need a header to + * put them in. Note that for 16-wide we're making space for two actual + * hardware registers here, so the emit will have to fix up for this. * * * ir4_tg4 needs to place its channel select in the header, * for interaction with ARB_texture_swizzle */ header_present = true; - mlen++; - base_mrf--; + next.reg_offset++; } if (ir->shadow_comparitor) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c)); - mlen += reg_width; + emit(MOV(next, shadow_c)); + next.reg_offset++; } /* Set up the LOD info */ @@ -1256,12 +1257,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_tg4: break; case ir_txb: - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); - mlen += reg_width; + emit(MOV(next, lod)); + next.reg_offset++; break; case ir_txl: - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); - mlen += reg_width; + emit(MOV(next, lod)); + next.reg_offset++; break; case ir_txd: { if (dispatch_width == 16) @@ -1271,32 +1272,32 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z */ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate)); + emit(MOV(next, coordinate)); coordinate.reg_offset++; - mlen += reg_width; + next.reg_offset++; /* For cube map array, the coordinate is (u,v,r,ai) but there are * only derivatives for (u, v, r). */ if (i < ir->lod_info.grad.dPdx->type->vector_elements) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod)); + emit(MOV(next, lod)); lod.reg_offset++; - mlen += reg_width; + next.reg_offset++; - emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2)); + emit(MOV(next, lod2)); lod2.reg_offset++; - mlen += reg_width; + next.reg_offset++; } } break; } case ir_txs: - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod)); - mlen += reg_width; + emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod)); + next.reg_offset++; break; case ir_query_levels: - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0))); - mlen += reg_width; + emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u))); + next.reg_offset++; break; case ir_txf: /* It appears that the ld instruction used for txf does its @@ -1314,40 +1315,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */ - emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), - coordinate, offsets[0])); + emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[0])); coordinate.reg_offset++; - mlen += reg_width; + next.reg_offset++; - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod)); - mlen += reg_width; + emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod)); + next.reg_offset++; for (int i = 1; i < ir->coordinate->type->vector_elements; i++) { - emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), - coordinate, offsets[i])); + emit(ADD(next.retype(BRW_REGISTER_TYPE_D), coordinate, offsets[i])); coordinate.reg_offset++; - mlen += reg_width; + next.reg_offset++; } break; case ir_txf_ms: - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index)); - mlen += reg_width; + emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index)); + next.reg_offset++; /* constant zero MCS; we arrange to never actually have a compressed * multisample surface here for now. TODO: issue ld_mcs to get this first, * if we ever support texturing from compressed multisample surfaces */ - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u))); - mlen += reg_width; + emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u))); + next.reg_offset++; /* there is no offsetting for this message; just copy in the integer * texture coordinates */ for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), - coordinate)); + emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate)); coordinate.reg_offset++; - mlen += reg_width; + next.reg_offset++; } break; } @@ -1355,32 +1353,37 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, /* Set up the coordinate (except for cases where it was done above) */ if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms && ir->op != ir_query_levels) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { - emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate)); + emit(MOV(next, coordinate)); coordinate.reg_offset++; - mlen += reg_width; + next.reg_offset++; } } /* Generate the SEND */ fs_inst *inst = NULL; switch (ir->op) { - case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break; - case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break; - case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break; - case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break; - case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break; - case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break; - case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break; - case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst); break; - case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break; - case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst); break; - } - inst->base_mrf = base_mrf; - inst->mlen = mlen; + case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break; + case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break; + case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break; + case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break; + case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break; + case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break; + case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; + case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break; + case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break; + case ir_tg4: inst = emit(SHADER_OPCODE_TG4, dst, payload); break; + } + inst->base_mrf = -1; + if (reg_width == 2) + inst->mlen = next.reg_offset * reg_width - header_present; + else + inst->mlen = next.reg_offset * reg_width; + inst->header_present = header_present; inst->regs_written = 4; - if (mlen > 11) { + virtual_grf_sizes[payload.reg] = next.reg_offset; + if (inst->mlen > 11) { fail("Message length >11 disallowed by hardware\n"); } @@ -1591,9 +1594,6 @@ fs_visitor::visit(ir_texture *ir) lod, lod2); } - /* The header is set up by generate_tex() when necessary. */ - inst->src[0] = reg_undef; - if (ir->offset != NULL && ir->op != ir_txf) inst->texture_offset = brw_texture_offset(ir->offset->as_constant()); diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 735ad93561e..b24c38c351d 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -569,7 +569,7 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < reg_width; r++) + for (int r = 0; r < reg_width * inst->regs_read(v, i); r++) add_dep(last_grf_write[inst->src[i].reg + r], n); } else { add_dep(last_grf_write[inst->src[i].reg], n); @@ -594,12 +594,14 @@ fs_instruction_scheduler::calculate_deps() } } - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(last_mrf_write[inst->base_mrf + i], n); + if (inst->base_mrf != -1) { + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(last_mrf_write[inst->base_mrf + i], n); + } } if (inst->predicate) { @@ -642,7 +644,7 @@ fs_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0) { + if (inst->mlen > 0 && inst->base_mrf != -1) { for (int i = 0; i < v->implied_mrf_writes(inst); i++) { add_dep(last_mrf_write[inst->base_mrf + i], n); last_mrf_write[inst->base_mrf + i] = n; @@ -677,7 +679,7 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < 3; i++) { if (inst->src[i].file == GRF) { if (post_reg_alloc) { - for (int r = 0; r < reg_width; r++) + for (int r = 0; r < reg_width * inst->regs_read(v, i); r++) add_dep(n, last_grf_write[inst->src[i].reg + r]); } else { add_dep(n, last_grf_write[inst->src[i].reg]); @@ -702,12 +704,14 @@ fs_instruction_scheduler::calculate_deps() } } - for (int i = 0; i < inst->mlen; i++) { - /* It looks like the MRF regs are released in the send - * instruction once it's sent, not when the result comes - * back. - */ - add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + if (inst->base_mrf != -1) { + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + } } if (inst->predicate) { @@ -749,7 +753,7 @@ fs_instruction_scheduler::calculate_deps() add_barrier_deps(n); } - if (inst->mlen > 0) { + if (inst->mlen > 0 && inst->base_mrf != -1) { for (int i = 0; i < v->implied_mrf_writes(inst); i++) { last_mrf_write[inst->base_mrf + i] = n; } |