diff options
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_fs.h | 4 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_reg_allocate.cpp | 175 |
2 files changed, 103 insertions, 76 deletions
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f05a9e0625a..098725b80d1 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -117,10 +117,6 @@ public: void assign_regs_trivial(); void calculate_payload_ranges(int payload_node_count, int *payload_last_use_ip); - void setup_payload_interference(struct ra_graph *g, int payload_reg_count, - int first_payload_node); - int choose_spill_reg(struct ra_graph *g); - void spill_reg(unsigned spill_reg); void split_virtual_grfs(); bool compact_virtual_grfs(); void assign_constant_locations(); diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 0462c77675c..14a110c326b 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -393,6 +393,44 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count, } } +class fs_reg_alloc { +public: + fs_reg_alloc(fs_visitor *fs): + fs(fs), devinfo(fs->devinfo), compiler(fs->compiler), g(NULL) + { + mem_ctx = ralloc_context(NULL); + int reg_width = fs->dispatch_width / 8; + rsi = _mesa_logbase2(reg_width); + } + + ~fs_reg_alloc() + { + ralloc_free(mem_ctx); + } + + bool assign_regs(bool allow_spilling, bool spill_all); + +private: + void setup_payload_interference(int payload_node_count, + int first_payload_node); + void setup_mrf_hack_interference(int first_mrf_node, + int *first_used_mrf); + void build_interference_graph(); + + int choose_spill_reg(); + void spill_reg(unsigned spill_reg); + + void *mem_ctx; + fs_visitor *fs; + const gen_device_info *devinfo; + const brw_compiler *compiler; + + /* Which compiler->fs_reg_sets[] to use */ + int rsi; + + ra_graph *g; +}; + /** * Sets up interference between thread payload registers and the virtual GRFs @@ -412,12 +450,11 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count, * (note that in SIMD16, a node is two registers). */ void -fs_visitor::setup_payload_interference(struct ra_graph *g, - int payload_node_count, - int first_payload_node) +fs_reg_alloc::setup_payload_interference(int payload_node_count, + int first_payload_node) { int payload_last_use_ip[payload_node_count]; - calculate_payload_ranges(payload_node_count, payload_last_use_ip); + fs->calculate_payload_ranges(payload_node_count, payload_last_use_ip); for (int i = 0; i < payload_node_count; i++) { if (payload_last_use_ip[i] == -1) @@ -427,12 +464,12 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * live between the start of the program and our last use of the payload * node. */ - for (unsigned j = 0; j < this->alloc.count; j++) { + for (unsigned j = 0; j < fs->alloc.count; j++) { /* Note that we use a <= comparison, unlike virtual_grf_interferes(), * in order to not have to worry about the uniform issue described in * calculate_live_intervals(). */ - if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) { + if (fs->virtual_grf_start[j] <= payload_last_use_ip[i]) { ra_add_node_interference(g, first_payload_node + i, j); } } @@ -444,7 +481,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * The alternative would be to have per-physical-register classes, which * would just be silly. */ - if (devinfo->gen <= 5 && dispatch_width >= 16) { + if (devinfo->gen <= 5 && fs->dispatch_width >= 16) { /* We have to divide by 2 here because we only have even numbered * registers. Some of the payload registers will be odd, but * that's ok because their physical register numbers have already @@ -497,15 +534,15 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used) * Sets interference between virtual GRFs and usage of the high GRFs for SEND * messages (treated as MRFs in code generation). */ -static void -setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g, - int first_mrf_node, int *first_used_mrf) +void +fs_reg_alloc::setup_mrf_hack_interference(int first_mrf_node, + int *first_used_mrf) { - bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)]; - get_used_mrfs(v, mrf_used); + bool mrf_used[BRW_MAX_MRF(fs->devinfo->gen)]; + get_used_mrfs(fs, mrf_used); - *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen); - for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) { + *first_used_mrf = BRW_MAX_MRF(devinfo->gen); + for (int i = 0; i < BRW_MAX_MRF(devinfo->gen); i++) { /* Mark each MRF reg node as being allocated to its physical register. * * The alternative would be to have per-physical-register classes, which @@ -520,15 +557,15 @@ setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g, if (i < *first_used_mrf) *first_used_mrf = i; - for (unsigned j = 0; j < v->alloc.count; j++) { + for (unsigned j = 0; j < fs->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } } } } -static ra_graph * -build_interference_graph(fs_visitor *fs) +void +fs_reg_alloc::build_interference_graph() { const gen_device_info *devinfo = fs->devinfo; const brw_compiler *compiler = fs->compiler; @@ -541,7 +578,7 @@ build_interference_graph(fs_visitor *fs) */ int reg_width = fs->dispatch_width / 8; int payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width); - int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */ + fs->calculate_live_intervals(); int node_count = fs->alloc.count; @@ -553,8 +590,10 @@ build_interference_graph(fs_visitor *fs) int grf127_send_hack_node = node_count; if (devinfo->gen >= 8) node_count ++; - struct ra_graph *g = - ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); + + assert(g == NULL); + g = ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); + ralloc_steal(mem_ctx, g); for (unsigned i = 0; i < fs->alloc.count; i++) { unsigned size = fs->alloc.sizes[i]; @@ -601,10 +640,10 @@ build_interference_graph(fs_visitor *fs) } } - fs->setup_payload_interference(g, payload_node_count, first_payload_node); + setup_payload_interference(payload_node_count, first_payload_node); if (devinfo->gen >= 7) { int first_used_mrf = BRW_MAX_MRF(devinfo->gen); - setup_mrf_hack_interference(fs, g, first_mrf_hack_node, + setup_mrf_hack_interference(first_mrf_hack_node, &first_used_mrf); foreach_block_and_inst(block, fs_inst, inst, fs->cfg) { @@ -717,8 +756,6 @@ build_interference_graph(fs_visitor *fs) inst->src[3].nr); } } - - return g; } namespace { @@ -816,13 +853,13 @@ emit_spill(const fs_builder &bld, fs_reg src, } int -fs_visitor::choose_spill_reg(struct ra_graph *g) +fs_reg_alloc::choose_spill_reg() { float block_scale = 1.0; - float spill_costs[this->alloc.count]; - bool no_spill[this->alloc.count]; + float spill_costs[fs->alloc.count]; + bool no_spill[fs->alloc.count]; - for (unsigned i = 0; i < this->alloc.count; i++) { + for (unsigned i = 0; i < fs->alloc.count; i++) { spill_costs[i] = 0.0; no_spill[i] = false; } @@ -831,7 +868,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) * spill/unspill we'll have to do, and guess that the insides of * loops run 10 times. */ - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst(block, fs_inst, inst, fs->cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale; @@ -875,8 +912,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } } - for (unsigned i = 0; i < this->alloc.count; i++) { - int live_length = virtual_grf_end[i] - virtual_grf_start[i]; + for (unsigned i = 0; i < fs->alloc.count; i++) { + int live_length = fs->virtual_grf_end[i] - fs->virtual_grf_start[i]; if (live_length <= 0) continue; @@ -896,10 +933,10 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } void -fs_visitor::spill_reg(unsigned spill_reg) +fs_reg_alloc::spill_reg(unsigned spill_reg) { - int size = alloc.sizes[spill_reg]; - unsigned int spill_offset = last_scratch; + int size = fs->alloc.sizes[spill_reg]; + unsigned int spill_offset = fs->last_scratch; assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ /* Spills may use MRFs 13-15 in the SIMD16 case. Our texturing is done @@ -909,29 +946,29 @@ fs_visitor::spill_reg(unsigned spill_reg) * depth), starting from m1. In summary: We may not be able to spill in * SIMD16 mode, because we'd stomp the FB writes. */ - if (!spilled_any_registers) { + if (!fs->spilled_any_registers) { bool mrf_used[BRW_MAX_MRF(devinfo->gen)]; - get_used_mrfs(this, mrf_used); + get_used_mrfs(fs, mrf_used); - for (int i = spill_base_mrf(this); i < BRW_MAX_MRF(devinfo->gen); i++) { + for (int i = spill_base_mrf(fs); i < BRW_MAX_MRF(devinfo->gen); i++) { if (mrf_used[i]) { - fail("Register spilling not supported with m%d used", i); + fs->fail("Register spilling not supported with m%d used", i); return; } } - spilled_any_registers = true; + fs->spilled_any_registers = true; } - last_scratch += size * REG_SIZE; + fs->last_scratch += size * REG_SIZE; /* Generate spill/unspill instructions for the objects being * spilled. Right now, we spill or unspill the whole thing to a * virtual grf of the same size. For most instructions, though, we * could just spill/unspill the GRF being accessed. */ - foreach_block_and_inst (block, fs_inst, inst, cfg) { - const fs_builder ibld = fs_builder(this, block, inst); + foreach_block_and_inst (block, fs_inst, inst, fs->cfg) { + const fs_builder ibld = fs_builder(fs, block, inst); for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF && @@ -939,7 +976,7 @@ fs_visitor::spill_reg(unsigned spill_reg) int count = regs_read(inst, i); int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE); - fs_reg unspill_dst(VGRF, alloc.allocate(count)); + fs_reg unspill_dst(VGRF, fs->alloc.allocate(count)); inst->src[i].nr = unspill_dst.nr; inst->src[i].offset %= REG_SIZE; @@ -967,7 +1004,7 @@ fs_visitor::spill_reg(unsigned spill_reg) inst->dst.nr == spill_reg) { int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->dst.offset, REG_SIZE); - fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst))); + fs_reg spill_src(VGRF, fs->alloc.allocate(regs_written(inst))); inst->dst.nr = spill_src.nr; inst->dst.offset %= REG_SIZE; @@ -989,7 +1026,7 @@ fs_visitor::spill_reg(unsigned spill_reg) */ const unsigned width = 8 * MIN2( DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE), - spill_max_size(this)); + spill_max_size(fs)); /* Spills should only write data initialized by the instruction for * whichever channels are enabled in the excution mask. If that's @@ -1020,29 +1057,20 @@ fs_visitor::spill_reg(unsigned spill_reg) } } - invalidate_live_intervals(); + fs->invalidate_live_intervals(); } bool -fs_visitor::assign_regs(bool allow_spilling, bool spill_all) +fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all) { - /* Most of this allocation was written for a reg_width of 1 - * (dispatch_width == 8). In extending to SIMD16, the code was - * left in place and it was converted to have the hardware - * registers it's allocating be contiguous physical pairs of regs - * for reg_width == 2. - */ - int reg_width = dispatch_width / 8; - int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */ - ra_graph *g = build_interference_graph(this); + build_interference_graph(); /* Debug of register spilling: Go spill everything. */ if (unlikely(spill_all)) { - int reg = choose_spill_reg(g); + int reg = choose_spill_reg(); if (reg != -1) { spill_reg(reg); - ralloc_free(g); return false; } } @@ -1051,17 +1079,15 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) /* Failed to allocate registers. Spill a reg, and the caller will * loop back into here to try again. */ - int reg = choose_spill_reg(g); + int reg = choose_spill_reg(); if (reg == -1) { - fail("no register to spill:\n"); - dump_instructions(NULL); + fs->fail("no register to spill:\n"); + fs->dump_instructions(NULL); } else if (allow_spilling) { spill_reg(reg); } - ralloc_free(g); - return false; } @@ -1069,26 +1095,31 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) * regs in the register classes back down to real hardware reg * numbers. */ - unsigned hw_reg_mapping[alloc.count]; - this->grf_used = this->first_non_payload_grf; - for (unsigned i = 0; i < this->alloc.count; i++) { + unsigned hw_reg_mapping[fs->alloc.count]; + fs->grf_used = fs->first_non_payload_grf; + for (unsigned i = 0; i < fs->alloc.count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = compiler->fs_reg_sets[rsi].ra_reg_to_grf[reg]; - this->grf_used = MAX2(this->grf_used, - hw_reg_mapping[i] + this->alloc.sizes[i]); + fs->grf_used = MAX2(fs->grf_used, + hw_reg_mapping[i] + fs->alloc.sizes[i]); } - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst(block, fs_inst, inst, fs->cfg) { assign_reg(hw_reg_mapping, &inst->dst); for (int i = 0; i < inst->sources; i++) { assign_reg(hw_reg_mapping, &inst->src[i]); } } - this->alloc.count = this->grf_used; - - ralloc_free(g); + fs->alloc.count = fs->grf_used; return true; } + +bool +fs_visitor::assign_regs(bool allow_spilling, bool spill_all) +{ + fs_reg_alloc alloc(this); + return alloc.assign_regs(allow_spilling, spill_all); +} |