diff options
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 96 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 73 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 163 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 8 |
6 files changed, 315 insertions, 55 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 8ffa7c760ac..0e3ccfa46c8 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -897,9 +897,11 @@ void brw_math2(struct brw_compile *p, struct brw_reg src0, struct brw_reg src1); -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - GLuint scratch_offset ); +void brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + GLuint offset); void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, @@ -918,9 +920,10 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, GLuint offset, GLuint bind_table_index); -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - GLuint scratch_offset ); +void brw_oword_block_write(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + GLuint offset); /* If/else/endif. Works by manipulating the execution flags on each * channel. diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 399b99c9607..63ab5c26f36 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1353,16 +1353,29 @@ void brw_math_16( struct brw_compile *p, /** - * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. - * Scratch offset should be a multiple of 64. - * Used for register spilling. + * Write a block of OWORDs (half a GRF each) from the scratch buffer, + * using a constant offset per channel. + * + * The offset must be aligned to oword size (16 bytes). Used for + * register spilling. */ -void brw_dp_WRITE_16( struct brw_compile *p, - struct brw_reg src, - GLuint scratch_offset ) +void brw_oword_block_write(struct brw_compile *p, + struct brw_reg mrf, + int num_regs, + GLuint offset) { struct intel_context *intel = &p->brw->intel; - GLuint msg_reg_nr = 1; + uint32_t msg_control; + int mlen; + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + mlen = 2; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + mlen = 3; + } + { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -1371,20 +1384,24 @@ void brw_dp_WRITE_16( struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); + brw_imm_d(offset)); brw_pop_insn_state(p); } { - GLuint msg_length = 3; struct brw_reg dest; struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); int send_commit_msg; + struct brw_reg src_header = retype(brw_vec8_grf(0, 0), + BRW_REGISTER_TYPE_UW); - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (insn->header.compression_control != BRW_COMPRESSION_NONE) { + insn->header.compression_control = BRW_COMPRESSION_NONE; + src_header = vec16(src_header); + } + assert(insn->header.predicate_control == BRW_PREDICATE_NONE); + insn->header.destreg__conditionalmod = mrf.nr; /* Until gen6, writes followed by reads from the same location * are not guaranteed to be ordered unless write_commit is set. @@ -1400,19 +1417,19 @@ void brw_dp_WRITE_16( struct brw_compile *p, dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); send_commit_msg = 0; } else { - dest = brw_uw16_grf(0, 0); + dest = src_header; send_commit_msg = 1; } brw_set_dest(insn, dest); - brw_set_src0(insn, src); + brw_set_src0(insn, src_header); brw_set_dp_write_message(p->brw, insn, 255, /* binding table index (255=stateless) */ - BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + msg_control, BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ - msg_length, + mlen, GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ send_commit_msg, /* response_length */ @@ -1423,15 +1440,32 @@ void brw_dp_WRITE_16( struct brw_compile *p, /** - * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. - * Scratch offset should be a multiple of 64. - * Used for register spilling. + * Read a block of owords (half a GRF each) from the scratch buffer + * using a constant index per channel. + * + * Offset must be aligned to oword size (16 bytes). Used for register + * spilling. */ -void brw_dp_READ_16( struct brw_compile *p, - struct brw_reg dest, - GLuint scratch_offset ) +void +brw_oword_block_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + int num_regs, + GLuint offset) { - GLuint msg_reg_nr = 1; + uint32_t msg_control; + int rlen; + + dest = retype(dest, BRW_REGISTER_TYPE_UW); + + if (num_regs == 1) { + msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; + rlen = 1; + } else { + msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; + rlen = 2; + } + { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -1440,29 +1474,29 @@ void brw_dp_READ_16( struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), - brw_imm_d(scratch_offset)); + brw_imm_d(offset)); brw_pop_insn_state(p); } { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - - insn->header.predicate_control = 0; /* XXX */ - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; - + + assert(insn->header.predicate_control == 0); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = mrf.nr; + brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); brw_set_dp_read_message(p->brw, insn, 255, /* binding table index (255=stateless) */ - BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, + msg_control, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 1, /* target cache (render/scratch) */ 1, /* msg_length */ - 2, /* response_length */ + rlen, 0); /* eot */ } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index bc39d1c29a1..f38976aa8a8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -179,10 +179,6 @@ type_size(const struct glsl_type *type) } } -static const fs_reg reg_undef; -static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); -static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); - int fs_visitor::virtual_grf_alloc(int size) { @@ -2227,6 +2223,47 @@ fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask) } void +fs_visitor::generate_spill(fs_inst *inst, struct brw_reg src) +{ + assert(inst->mlen != 0); + + brw_MOV(p, + retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), + retype(src, BRW_REGISTER_TYPE_UD)); + brw_oword_block_write(p, brw_message_reg(inst->base_mrf), 1, inst->offset); +} + +void +fs_visitor::generate_unspill(fs_inst *inst, struct brw_reg dst) +{ + assert(inst->mlen != 0); + + /* Clear any post destination dependencies that would be ignored by + * the block read. See the B-Spec for pre-gen5 send instruction. + * + * This could use a better solution, since texture sampling and + * math reads could potentially run into it as well -- anywhere + * that we have a SEND with a destination that is a register that + * was written but not read within the last N instructions (what's + * N? unsure). This is rare because of dead code elimination, but + * not impossible. + */ + if (intel->gen == 4 && !intel->is_g4x) + brw_MOV(p, brw_null_reg(), dst); + + brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), 1, + inst->offset); + + if (intel->gen == 4 && !intel->is_g4x) { + /* gen4 errata: destination from a send can't be used as a + * destination until it's been read. Just read it so we don't + * have to worry. + */ + brw_MOV(p, brw_null_reg(), dst); + } +} + +void fs_visitor::assign_curb_setup() { c->prog_data.first_curbe_grf = c->key.nr_payload_regs; @@ -3040,6 +3077,15 @@ fs_visitor::generate_code() case FS_OPCODE_DDY: generate_ddy(inst, dst, src[0]); break; + + case FS_OPCODE_SPILL: + generate_spill(inst, src[0]); + break; + + case FS_OPCODE_UNSPILL: + generate_unspill(inst, dst); + break; + case FS_OPCODE_FB_WRITE: generate_fb_write(inst); break; @@ -3145,10 +3191,25 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.dead_code_eliminate() || progress; } while (progress); + if (0) { + /* Debug of register spilling: Go spill everything. */ + int virtual_grf_count = v.virtual_grf_next; + for (int i = 1; i < virtual_grf_count; i++) { + v.spill_reg(i); + } + v.calculate_live_intervals(); + } + if (0) v.assign_regs_trivial(); - else - v.assign_regs(); + else { + while (!v.assign_regs()) { + if (v.fail) + break; + + v.calculate_live_intervals(); + } + } } if (!v.fail) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b8126083f9e..de7137a7db3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -74,6 +74,8 @@ enum fs_opcodes { FS_OPCODE_TXL, FS_OPCODE_DISCARD_NOT, FS_OPCODE_DISCARD_AND, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, }; @@ -196,6 +198,7 @@ public: this->shadow_compare = false; this->mlen = 0; this->base_mrf = 0; + this->offset = 0; } fs_inst() @@ -281,6 +284,7 @@ public: bool eot; bool header_present; bool shadow_compare; + uint32_t offset; /* spill/unspill offset */ /** @{ * Annotation for the generated IR. One of the two can be set. @@ -359,8 +363,10 @@ public: void assign_curb_setup(); void calculate_urb_setup(); void assign_urb_setup(); - void assign_regs(); + bool assign_regs(); void assign_regs_trivial(); + int choose_spill_reg(struct ra_graph *g); + void spill_reg(int spill_reg); void split_virtual_grfs(); void calculate_live_intervals(); bool propagate_constants(); @@ -378,6 +384,8 @@ public: void generate_discard_and(fs_inst *inst, struct brw_reg temp); void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); + void generate_spill(fs_inst *inst, struct brw_reg src); + void generate_unspill(fs_inst *inst, struct brw_reg dst); void emit_dummy_fs(); fs_reg *emit_fragcoord_interpolation(ir_variable *ir); @@ -391,6 +399,7 @@ public: fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); + void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); void emit_fb_writes(); void emit_assignment_writes(fs_reg &l, fs_reg &r, @@ -444,5 +453,9 @@ public: int grf_used; }; +static const fs_reg reg_undef; +static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); +static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); + GLboolean brw_do_channel_expressions(struct exec_list *instructions); GLboolean brw_do_vector_splitting(struct exec_list *instructions); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 2dda8acb387..b5bfd00d5fe 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -84,7 +84,7 @@ fs_visitor::assign_regs_trivial() this->grf_used = last_grf + 1; } -void +bool fs_visitor::assign_regs() { int last_grf = 0; @@ -220,11 +220,22 @@ fs_visitor::assign_regs() } } - /* FINISHME: Handle spilling */ if (!ra_allocate_no_spills(g)) { - fprintf(stderr, "Failed to allocate registers.\n"); - this->fail = true; - return; + /* Failed to allocate registers. Spill a reg, and the caller will + * loop back into here to try again. + */ + int reg = choose_spill_reg(g); + if (reg == -1) { + this->fail = true; + } else { + spill_reg(reg); + } + + + talloc_free(g); + talloc_free(regs); + + return false; } /* Get the chosen virtual registers for each node, and map virtual @@ -262,4 +273,146 @@ fs_visitor::assign_regs() talloc_free(g); talloc_free(regs); + + return true; +} + +void +fs_visitor::emit_unspill(fs_inst *inst, fs_reg dst, uint32_t spill_offset) +{ + int size = virtual_grf_sizes[dst.reg]; + dst.reg_offset = 0; + + for (int chan = 0; chan < size; chan++) { + fs_inst *unspill_inst = new(mem_ctx) fs_inst(FS_OPCODE_UNSPILL, + dst); + dst.reg_offset++; + unspill_inst->offset = spill_offset + chan * REG_SIZE; + unspill_inst->ir = inst->ir; + unspill_inst->annotation = inst->annotation; + + /* Choose a MRF that won't conflict with an MRF that's live across the + * spill. Nothing else will make it up to MRF 14/15. + */ + unspill_inst->base_mrf = 14; + unspill_inst->mlen = 1; /* header contains offset */ + inst->insert_before(unspill_inst); + } +} + +int +fs_visitor::choose_spill_reg(struct ra_graph *g) +{ + float loop_scale = 1.0; + float spill_costs[this->virtual_grf_next]; + bool no_spill[this->virtual_grf_next]; + + for (int i = 0; i < this->virtual_grf_next; i++) { + spill_costs[i] = 0.0; + no_spill[i] = false; + } + + /* Calculate costs for spilling nodes. Call it a cost of 1 per + * spill/unspill we'll have to do, and guess that the insides of + * loops run 10 times. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + int size = virtual_grf_sizes[inst->src[i].reg]; + spill_costs[inst->src[i].reg] += size * loop_scale; + } + } + + if (inst->dst.file == GRF) { + int size = virtual_grf_sizes[inst->dst.reg]; + spill_costs[inst->dst.reg] += size * loop_scale; + } + + switch (inst->opcode) { + + case BRW_OPCODE_DO: + loop_scale *= 10; + break; + + case BRW_OPCODE_WHILE: + loop_scale /= 10; + break; + + case FS_OPCODE_SPILL: + if (inst->src[0].file == GRF) + no_spill[inst->src[0].reg] = true; + break; + + case FS_OPCODE_UNSPILL: + if (inst->dst.file == GRF) + no_spill[inst->dst.reg] = true; + break; + } + } + + for (int i = 0; i < this->virtual_grf_next; i++) { + if (!no_spill[i]) + ra_set_node_spill_cost(g, i, spill_costs[i]); + } + + return ra_get_best_spill_node(g); +} + +void +fs_visitor::spill_reg(int spill_reg) +{ + int size = virtual_grf_sizes[spill_reg]; + unsigned int spill_offset = c->last_scratch; + assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ + c->last_scratch += size * REG_SIZE; + + /* Generate spill/unspill instructions for the objects being + * spilled. Right now, we spill or unspill the whole thing to a + * virtual grf of the same size. For most instructions, though, we + * could just spill/unspill the GRF being accessed. + */ + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + + for (unsigned int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF && + inst->src[i].reg == spill_reg) { + inst->src[i].reg = virtual_grf_alloc(size); + emit_unspill(inst, inst->src[i], spill_offset); + } + } + + if (inst->dst.file == GRF && + inst->dst.reg == spill_reg) { + inst->dst.reg = virtual_grf_alloc(size); + + /* Since we spill/unspill the whole thing even if we access + * just a component, we may need to unspill before the + * instruction we're spilling for. + */ + if (size != 1 || inst->predicated) { + emit_unspill(inst, inst->dst, spill_offset); + } + + fs_reg spill_src = inst->dst; + spill_src.reg_offset = 0; + spill_src.abs = false; + spill_src.negate = false; + + for (int chan = 0; chan < size; chan++) { + fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL, + reg_null_f, spill_src); + spill_src.reg_offset++; + spill_inst->offset = spill_offset + chan * REG_SIZE; + spill_inst->ir = inst->ir; + spill_inst->annotation = inst->annotation; + spill_inst->base_mrf = 14; + spill_inst->mlen = 2; /* header, value */ + inst->insert_after(spill_inst); + } + } + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index cb71c665b47..88bc64e5dd9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1576,9 +1576,7 @@ static void emit_spill( struct brw_wm_compile *c, mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } */ - brw_dp_WRITE_16(p, - retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - slot); + brw_oword_block_write(p, brw_message_reg(1), 2, slot); } @@ -1603,9 +1601,7 @@ static void emit_unspill( struct brw_wm_compile *c, send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } */ - brw_dp_READ_16(p, - retype(vec16(reg), BRW_REGISTER_TYPE_UW), - slot); + brw_oword_block_read(p, vec16(reg), brw_message_reg(1), 2, slot); } |