diff options
18 files changed, 235 insertions, 201 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 200a494c4c9..45870356f93 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -424,7 +424,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_reg &dst, assert(dst.width % 8 == 0); int regs_written = 4 * (dst.width / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, virtual_grf_alloc(regs_written), + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type, dst.width); inst = new(mem_ctx) fs_inst(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -688,7 +688,7 @@ fs_visitor::get_timestamp() 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4); + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); fs_inst *mov = emit(MOV(dst, ts)); /* We want to read the 3 fields we care about even if it's not enabled in @@ -764,7 +764,7 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1); + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); emit(ADD(diff, start, shader_end_time)); /* If there were no instructions between the two timestamp gets, the diff @@ -1029,26 +1029,11 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) } } -int -fs_visitor::virtual_grf_alloc(int size) -{ - if (virtual_grf_array_size <= virtual_grf_count) { - if (virtual_grf_array_size == 0) - virtual_grf_array_size = 16; - else - virtual_grf_array_size *= 2; - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, - virtual_grf_array_size); - } - virtual_grf_sizes[virtual_grf_count] = size; - return virtual_grf_count++; -} - fs_reg fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(type_size(type) * reg_width), + return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), brw_type_for_base_type(type), dispatch_width); } @@ -1056,7 +1041,7 @@ fs_reg fs_visitor::vgrf(int num_components) { int reg_width = dispatch_width / 8; - return fs_reg(GRF, virtual_grf_alloc(num_components * reg_width), + return fs_reg(GRF, alloc.allocate(num_components * reg_width), BRW_REGISTER_TYPE_F, dispatch_width); } @@ -1912,14 +1897,14 @@ fs_visitor::assign_vs_urb_setup() void fs_visitor::split_virtual_grfs() { - int num_vars = this->virtual_grf_count; + int num_vars = this->alloc.count; /* Count the total number of registers */ int reg_count = 0; int vgrf_to_reg[num_vars]; for (int i = 0; i < num_vars; i++) { vgrf_to_reg[i] = reg_count; - reg_count += virtual_grf_sizes[i]; + reg_count += alloc.sizes[i]; } /* An array of "split points". For each register slot, this indicates @@ -1935,14 +1920,14 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == GRF) { int reg = vgrf_to_reg[inst->dst.reg]; - for (int j = 1; j < this->virtual_grf_sizes[inst->dst.reg]; j++) + for (unsigned j = 1; j < this->alloc.sizes[inst->dst.reg]; j++) split_points[reg + j] = true; } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { int reg = vgrf_to_reg[inst->src[i].reg]; - for (int j = 1; j < this->virtual_grf_sizes[inst->src[i].reg]; j++) + for (unsigned j = 1; j < this->alloc.sizes[inst->src[i].reg]; j++) split_points[reg + j] = true; } } @@ -1988,13 +1973,13 @@ fs_visitor::split_virtual_grfs() int offset = 1; /* j > 0 case */ - for (int j = 1; j < virtual_grf_sizes[i]; j++) { + for (unsigned j = 1; j < alloc.sizes[i]; j++) { /* If this is a split point, reset the offset to 0 and allocate a * new virtual GRF for the previous offset many registers */ if (split_points[reg]) { assert(offset <= MAX_VGRF_SIZE); - int grf = virtual_grf_alloc(offset); + int grf = alloc.allocate(offset); for (int k = reg - offset; k < reg; k++) new_virtual_grf[k] = grf; offset = 0; @@ -2006,7 +1991,7 @@ fs_visitor::split_virtual_grfs() /* The last one gets the original register number */ assert(offset <= MAX_VGRF_SIZE); - virtual_grf_sizes[i] = offset; + alloc.sizes[i] = offset; for (int k = reg - offset; k < reg; k++) new_virtual_grf[k] = i; } @@ -2017,14 +2002,14 @@ fs_visitor::split_virtual_grfs() reg = vgrf_to_reg[inst->dst.reg] + inst->dst.reg_offset; inst->dst.reg = new_virtual_grf[reg]; inst->dst.reg_offset = new_reg_offset[reg]; - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]); + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { reg = vgrf_to_reg[inst->src[i].reg] + inst->src[i].reg_offset; inst->src[i].reg = new_virtual_grf[reg]; inst->src[i].reg_offset = new_reg_offset[reg]; - assert(new_reg_offset[reg] < virtual_grf_sizes[new_virtual_grf[reg]]); + assert((unsigned)new_reg_offset[reg] < alloc.sizes[new_virtual_grf[reg]]); } } } @@ -2044,7 +2029,7 @@ bool fs_visitor::compact_virtual_grfs() { bool progress = false; - int remap_table[this->virtual_grf_count]; + int remap_table[this->alloc.count]; memset(remap_table, -1, sizeof(remap_table)); /* Mark which virtual GRFs are used. */ @@ -2060,7 +2045,7 @@ fs_visitor::compact_virtual_grfs() /* Compact the GRF arrays. */ int new_index = 0; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (remap_table[i] == -1) { /* We just found an unused register. This means that we are * actually going to compact something. @@ -2068,13 +2053,13 @@ fs_visitor::compact_virtual_grfs() progress = true; } else { remap_table[i] = new_index; - virtual_grf_sizes[new_index] = virtual_grf_sizes[i]; + alloc.sizes[new_index] = alloc.sizes[i]; invalidate_live_intervals(); ++new_index; } } - this->virtual_grf_count = new_index; + this->alloc.count = new_index; /* Patch all the instructions to use the newly renumbered registers */ foreach_block_and_inst(block, fs_inst, inst, cfg) { @@ -2458,8 +2443,8 @@ fs_visitor::opt_register_renaming() bool progress = false; int depth = 0; - int remap[virtual_grf_count]; - memset(remap, -1, sizeof(int) * virtual_grf_count); + int remap[alloc.count]; + memset(remap, -1, sizeof(int) * alloc.count); foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->opcode == BRW_OPCODE_IF || inst->opcode == BRW_OPCODE_DO) { @@ -2483,12 +2468,12 @@ fs_visitor::opt_register_renaming() if (depth == 0 && inst->dst.file == GRF && - virtual_grf_sizes[inst->dst.reg] == inst->dst.width / 8 && + alloc.sizes[inst->dst.reg] == inst->dst.width / 8 && !inst->is_partial_write()) { if (remap[dst] == -1) { remap[dst] = dst; } else { - remap[dst] = virtual_grf_alloc(inst->dst.width / 8); + remap[dst] = alloc.allocate(inst->dst.width / 8); inst->dst.reg = remap[dst]; progress = true; } @@ -3030,7 +3015,7 @@ fs_visitor::lower_uniform_pull_constant_loads() */ if (brw->gen >= 9) { payload.reg_offset++; - virtual_grf_sizes[payload.reg] = 2; + alloc.sizes[payload.reg] = 2; } /* This is actually going to be a MOV, but since only the first dword @@ -3071,11 +3056,11 @@ fs_visitor::lower_load_payload() { bool progress = false; - int vgrf_to_reg[virtual_grf_count]; + int vgrf_to_reg[alloc.count]; int reg_count = 16; /* Leave room for MRF */ - for (int i = 0; i < virtual_grf_count; ++i) { + for (unsigned i = 0; i < alloc.count; ++i) { vgrf_to_reg[i] = reg_count; - reg_count += virtual_grf_sizes[i]; + reg_count += alloc.sizes[i]; } struct { @@ -3239,7 +3224,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "vgrf%d", inst->dst.reg); if (inst->dst.width != dispatch_width) fprintf(file, "@%d", inst->dst.width); - if (virtual_grf_sizes[inst->dst.reg] != inst->dst.width / 8 || + if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 || inst->dst.subreg_offset) fprintf(file, "+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); @@ -3299,7 +3284,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "vgrf%d", inst->src[i].reg); if (inst->src[i].width != dispatch_width) fprintf(file, "@%d", inst->src[i].width); - if (virtual_grf_sizes[inst->src[i].reg] != inst->src[i].width / 8 || + if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 || inst->src[i].subreg_offset) fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); @@ -3550,9 +3535,9 @@ fs_visitor::calculate_register_pressure() regs_live_at_ip = rzalloc_array(mem_ctx, int, num_instructions); - for (int reg = 0; reg < virtual_grf_count; reg++) { + for (unsigned reg = 0; reg < alloc.count; reg++) { for (int ip = virtual_grf_start[reg]; ip <= virtual_grf_end[reg]; ip++) - regs_live_at_ip[ip] += virtual_grf_sizes[reg]; + regs_live_at_ip[ip] += alloc.sizes[reg]; } } @@ -3640,7 +3625,7 @@ fs_visitor::fixup_3src_null_dest() { foreach_block_and_inst_safe (block, fs_inst, inst, cfg) { if (inst->is_3src() && inst->dst.is_null()) { - inst->dst = fs_reg(GRF, virtual_grf_alloc(dispatch_width / 8), + inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), inst->dst.type); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b95e2c03e13..2c9b705c07b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -321,7 +321,6 @@ public: void init(); fs_reg *variable_storage(ir_variable *var); - int virtual_grf_alloc(int size); fs_reg vgrf(const glsl_type *const type); fs_reg vgrf(int num_components); void import_uniforms(fs_visitor *v); @@ -643,9 +642,6 @@ public: int *param_size; - int *virtual_grf_sizes; - int virtual_grf_count; - int virtual_grf_array_size; int *virtual_grf_start; int *virtual_grf_end; brw::fs_live_variables *live_intervals; @@ -679,7 +675,7 @@ public: bool do_dual_src; int first_non_payload_grf; /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */ - int max_grf; + unsigned max_grf; fs_reg *fp_temp_regs; fs_reg *fp_input_regs; @@ -730,7 +726,7 @@ public: fs_reg shader_start_time; fs_reg userplane[MAX_CLIP_PLANES]; - int grf_used; + unsigned grf_used; bool spilled_any_registers; const unsigned dispatch_width; /**< 8 or 16 */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 11cb327614c..ae069bb757a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -224,7 +224,7 @@ fs_visitor::opt_cse_local(bblock_t *block) assert(written % dst_width == 0); fs_reg orig_dst = entry->generator->dst; - fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written), + fs_reg tmp = fs_reg(GRF, alloc.allocate(written), orig_dst.type, orig_dst.width); entry->tmp = tmp; entry->generator->dst = tmp; diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 189a119025d..968219bc074 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -278,17 +278,17 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg) { mem_ctx = ralloc_context(NULL); - num_vgrfs = v->virtual_grf_count; + num_vgrfs = v->alloc.count; num_vars = 0; var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs); for (int i = 0; i < num_vgrfs; i++) { var_from_vgrf[i] = num_vars; - num_vars += v->virtual_grf_sizes[i]; + num_vars += v->alloc.sizes[i]; } vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars); for (int i = 0; i < num_vgrfs; i++) { - for (int j = 0; j < v->virtual_grf_sizes[i]; j++) { + for (unsigned j = 0; j < v->alloc.sizes[i]; j++) { vgrf_from_var[var_from_vgrf[i] + j] = i; } } @@ -344,7 +344,7 @@ fs_visitor::calculate_live_intervals() if (this->live_intervals) return; - int num_vgrfs = this->virtual_grf_count; + int num_vgrfs = this->alloc.count; ralloc_free(this->virtual_grf_start); ralloc_free(this->virtual_grf_end); virtual_grf_start = ralloc_array(mem_ctx, int, num_vgrfs); diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h index a52f922d959..a9b61aa39e5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.h +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.h @@ -77,7 +77,7 @@ public: /** * Map from any index in block_data to the virtual GRF containing it. * - * For virtual_grf_sizes of [1, 2, 3], vgrf_from_var would contain + * For alloc.sizes of [1, 2, 3], vgrf_from_var would contain * [0, 1, 1, 2, 2, 2]. */ int *vgrf_from_var; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index bcd657b2172..ebe0b12b098 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -31,7 +31,7 @@ #include "glsl/ir_optimization.h" static void -assign_reg(int *reg_hw_locations, fs_reg *reg) +assign_reg(unsigned *reg_hw_locations, fs_reg *reg) { if (reg->file == GRF) { assert(reg->reg_offset >= 0); @@ -43,17 +43,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg) void fs_visitor::assign_regs_trivial() { - int hw_reg_mapping[this->virtual_grf_count + 1]; - int i; + unsigned hw_reg_mapping[this->alloc.count + 1]; + unsigned i; int reg_width = dispatch_width / 8; /* Note that compressed instructions require alignment to 2 registers. */ hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 1; i <= this->virtual_grf_count; i++) { + for (i = 1; i <= this->alloc.count; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + - this->virtual_grf_sizes[i - 1]); + this->alloc.sizes[i - 1]); } - this->grf_used = hw_reg_mapping[this->virtual_grf_count]; + this->grf_used = hw_reg_mapping[this->alloc.count]; foreach_block_and_inst(block, fs_inst, inst, cfg) { assign_reg(hw_reg_mapping, &inst->dst); @@ -66,7 +66,7 @@ fs_visitor::assign_regs_trivial() fail("Ran out of regs on trivial allocator (%d/%d)\n", this->grf_used, max_grf); } else { - this->virtual_grf_count = this->grf_used; + this->alloc.count = this->grf_used; } } @@ -427,7 +427,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * live between the start of the program and our last use of the payload * node. */ - for (int j = 0; j < this->virtual_grf_count; j++) { + for (unsigned j = 0; j < this->alloc.count; j++) { /* Note that we use a <= comparison, unlike virtual_grf_interferes(), * in order to not have to worry about the uniform issue described in * calculate_live_intervals(). @@ -515,7 +515,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) * that are used as conflicting with all virtual GRFs. */ if (mrf_used[i]) { - for (int j = 0; j < this->virtual_grf_count; j++) { + for (unsigned j = 0; j < this->alloc.count; j++) { ra_add_node_interference(g, first_mrf_node + i, j); } } @@ -533,12 +533,12 @@ fs_visitor::assign_regs(bool allow_spilling) * for reg_width == 2. */ int reg_width = dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_count]; + unsigned hw_reg_mapping[this->alloc.count]; int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width); int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */ calculate_live_intervals(); - int node_count = this->virtual_grf_count; + int node_count = this->alloc.count; int first_payload_node = node_count; node_count += payload_node_count; int first_mrf_hack_node = node_count; @@ -547,8 +547,8 @@ fs_visitor::assign_regs(bool allow_spilling) struct ra_graph *g = ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count); - for (int i = 0; i < this->virtual_grf_count; i++) { - unsigned size = this->virtual_grf_sizes[i]; + for (unsigned i = 0; i < this->alloc.count; i++) { + unsigned size = this->alloc.sizes[i]; int c; assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) && @@ -572,7 +572,7 @@ fs_visitor::assign_regs(bool allow_spilling) ra_set_node_class(g, i, c); - for (int j = 0; j < i; j++) { + for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling) * highest register that works. */ if (inst->eot) { - int size = virtual_grf_sizes[inst->src[0].reg]; + int size = alloc.sizes[inst->src[0].reg]; int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1; ra_set_node_reg(g, inst->src[0].reg, reg); break; @@ -661,12 +661,12 @@ fs_visitor::assign_regs(bool allow_spilling) * numbers. */ this->grf_used = payload_node_count; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg]; this->grf_used = MAX2(this->grf_used, - hw_reg_mapping[i] + this->virtual_grf_sizes[i]); + hw_reg_mapping[i] + this->alloc.sizes[i]); } foreach_block_and_inst(block, fs_inst, inst, cfg) { @@ -676,7 +676,7 @@ fs_visitor::assign_regs(bool allow_spilling) } } - this->virtual_grf_count = this->grf_used; + this->alloc.count = this->grf_used; ralloc_free(g); @@ -747,10 +747,10 @@ int fs_visitor::choose_spill_reg(struct ra_graph *g) { float loop_scale = 1.0; - float spill_costs[this->virtual_grf_count]; - bool no_spill[this->virtual_grf_count]; + float spill_costs[this->alloc.count]; + bool no_spill[this->alloc.count]; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; no_spill[i] = false; } @@ -811,7 +811,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } } - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (!no_spill[i]) ra_set_node_spill_cost(g, i, spill_costs[i]); } @@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) void fs_visitor::spill_reg(int spill_reg) { - int size = virtual_grf_sizes[spill_reg]; + int size = alloc.sizes[spill_reg]; unsigned int spill_offset = last_scratch; assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */ int spill_base_mrf = dispatch_width > 8 ? 13 : 14; @@ -862,7 +862,7 @@ fs_visitor::spill_reg(int spill_reg) int regs_read = inst->regs_read(this, i); int subset_spill_offset = (spill_offset + REG_SIZE * inst->src[i].reg_offset); - fs_reg unspill_dst(GRF, virtual_grf_alloc(regs_read)); + fs_reg unspill_dst(GRF, alloc.allocate(regs_read)); inst->src[i].reg = unspill_dst.reg; inst->src[i].reg_offset = 0; @@ -876,7 +876,7 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.reg == spill_reg) { int subset_spill_offset = (spill_offset + REG_SIZE * inst->dst.reg_offset); - fs_reg spill_src(GRF, virtual_grf_alloc(inst->regs_written)); + fs_reg spill_src(GRF, alloc.allocate(inst->regs_written)); inst->dst.reg = spill_src.reg; inst->dst.reg_offset = 0; diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 62788cd3091..09f0faddbdb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -66,7 +66,7 @@ is_nop_mov(const fs_inst *inst) static bool is_copy_payload(const fs_visitor *v, const fs_inst *inst) { - if (v->virtual_grf_sizes[inst->src[0].reg] != inst->regs_written) + if (v->alloc.sizes[inst->src[0].reg] != inst->regs_written) return false; fs_reg reg = inst->src[0]; @@ -94,8 +94,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) return false; } - if (v->virtual_grf_sizes[inst->src[0].reg] > - v->virtual_grf_sizes[inst->dst.reg]) + if (v->alloc.sizes[inst->src[0].reg] > + v->alloc.sizes[inst->dst.reg]) return false; if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) { @@ -179,7 +179,7 @@ fs_visitor::register_coalesce() if (reg_from != inst->src[0].reg) { reg_from = inst->src[0].reg; - src_size = virtual_grf_sizes[inst->src[0].reg]; + src_size = alloc.sizes[inst->src[0].reg]; assert(src_size <= MAX_VGRF_SIZE); assert(inst->src[0].width % 8 == 0); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 6cddcf5e7e9..2a36d942838 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1278,7 +1278,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, /* If last_rhs_inst wrote a different number of components than our LHS, * we can't safely rewrite it. */ - if (virtual_grf_sizes[dst.reg] != modify->regs_written) + if (alloc.sizes[dst.reg] != modify->regs_written) return false; /* Success! Rewrite the instruction. */ @@ -1461,7 +1461,7 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst, * this weirdness around to the expected layout. */ orig_dst = dst; - dst = fs_reg(GRF, virtual_grf_alloc(8), orig_dst.type); + dst = fs_reg(GRF, alloc.allocate(8), orig_dst.type); } enum opcode opcode; @@ -1672,7 +1672,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, * need to offset the Sampler State Pointer in the header. */ header_present = true; - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); length++; } @@ -1814,7 +1814,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, else mlen = length * reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_F); emit(LOAD_PAYLOAD(src_payload, sources, length)); @@ -1926,7 +1926,7 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, * tracking to get the scaling factor. */ if (brw->gen < 6 && is_rect) { - fs_reg dst = fs_reg(GRF, virtual_grf_alloc(coord_components)); + fs_reg dst = fs_reg(GRF, alloc.allocate(coord_components)); fs_reg src = coordinate; coordinate = dst; @@ -1985,7 +1985,7 @@ fs_reg fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) { int reg_width = dispatch_width / 8; - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(components * reg_width), + fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width), BRW_REGISTER_TYPE_F); fs_reg dest = vgrf(glsl_type::uvec4_type); fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components); @@ -2986,7 +2986,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 4); - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; @@ -3020,7 +3020,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, } int mlen = 1 + (length - 1) * reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD); emit(LOAD_PAYLOAD(src_payload, sources, length)); @@ -3041,7 +3041,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 2); - sources[0] = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + sources[0] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* Initialize the sample mask in the message header. */ emit(MOV(sources[0], fs_reg(0u))) ->force_writemask_all = true; @@ -3060,7 +3060,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, emit(MOV(sources[1], offset)); int mlen = 1 + reg_width; - fs_reg src_payload = fs_reg(GRF, virtual_grf_alloc(mlen), + fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(LOAD_PAYLOAD(src_payload, sources, 2)); @@ -3280,7 +3280,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) int len = 0; for (unsigned i = 0; i < 4; ++i) { if (colors_enabled & (1 << i)) { - dst[len] = fs_reg(GRF, virtual_grf_alloc(color.width / 8), + dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8), color.type, color.width); inst = emit(MOV(dst[len], offset(color, i))); inst->saturate = key->clamp_fragment_color; @@ -3304,11 +3304,11 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components) */ for (unsigned i = 0; i < 4; ++i) { if (colors_enabled & (1 << i)) { - dst[i] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); inst = emit(MOV(dst[i], half(offset(color, i), 0))); inst->saturate = key->clamp_fragment_color; - dst[i + 4] = fs_reg(GRF, virtual_grf_alloc(1), color.type); + dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type); inst = emit(MOV(dst[i + 4], half(offset(color, i), 1))); inst->saturate = key->clamp_fragment_color; inst->force_sechalf = true; @@ -3409,7 +3409,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, length += 2; if (payload.aa_dest_stencil_reg) { - sources[length] = fs_reg(GRF, virtual_grf_alloc(1)); + sources[length] = fs_reg(GRF, alloc.allocate(1)); emit(MOV(sources[length], fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)))); length++; @@ -3423,7 +3423,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, /* Hand over gl_SampleMask. Only lower 16 bits are relevant. Since * it's unsinged single words, one vgrf is always 16-wide. */ - sources[length] = fs_reg(GRF, virtual_grf_alloc(1), + sources[length] = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UW, 16); emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; @@ -3437,7 +3437,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, length += setup_color_payload(sources + length, this->outputs[0], 0); } else if (color1.file == BAD_FILE) { if (src0_alpha.file != BAD_FILE) { - sources[length] = fs_reg(GRF, virtual_grf_alloc(reg_size), + sources[length] = fs_reg(GRF, alloc.allocate(reg_size), src0_alpha.type, src0_alpha.width); fs_inst *inst = emit(MOV(sources[length], src0_alpha)); inst->saturate = key->clamp_fragment_color; @@ -3486,7 +3486,7 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, /* Send from the GRF */ fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); load = emit(LOAD_PAYLOAD(payload, sources, length)); - payload.reg = virtual_grf_alloc(load->regs_written); + payload.reg = alloc.allocate(load->regs_written); payload.width = dispatch_width; load->dst = payload; write = emit(FS_OPCODE_FB_WRITE, reg_undef, payload); @@ -3655,7 +3655,7 @@ fs_visitor::emit_urb_writes() * send to terminate the shader. */ if (vue_map->slots_valid == 0) { - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)))); inst->force_writemask_all = true; @@ -3688,7 +3688,7 @@ fs_visitor::emit_urb_writes() break; } - zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD); + zero = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); emit(MOV(zero, fs_reg(0u))); sources[length++] = zero; @@ -3742,7 +3742,7 @@ fs_visitor::emit_urb_writes() * temp register and use that for the payload. */ for (int i = 0; i < 4; i++) { - reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type); + reg = fs_reg(GRF, alloc.allocate(1), outputs[varying].type); src = offset(this->outputs[varying], i); fs_inst *inst = emit(MOV(reg, src)); inst->saturate = true; @@ -3769,14 +3769,14 @@ fs_visitor::emit_urb_writes() emit_shader_time_end(); fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); - fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1), + fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), BRW_REGISTER_TYPE_F); /* We need WE_all on the MOV for the message header (the URB handles) * so do a MOV to a dummy register and set force_writemask_all on the * MOV. LOAD_PAYLOAD will preserve that. */ - fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1), + fs_reg dummy = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)))); @@ -3892,9 +3892,6 @@ fs_visitor::init() this->current_annotation = NULL; this->base_ir = NULL; - this->virtual_grf_sizes = NULL; - this->virtual_grf_count = 0; - this->virtual_grf_array_size = 0; this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_ir_allocator.h b/src/mesa/drivers/dri/i965/brw_ir_allocator.h new file mode 100644 index 00000000000..b1237ed38e7 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_ir_allocator.h @@ -0,0 +1,87 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010-2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef BRW_IR_ALLOCATOR_H +#define BRW_IR_ALLOCATOR_H + +#include "main/macros.h" + +namespace brw { + /** + * Simple allocator used to keep track of virtual GRFs. + */ + class simple_allocator { + public: + simple_allocator() : + sizes(NULL), offsets(NULL), count(0), total_size(0), capacity(0) + { + } + + ~simple_allocator() + { + free(offsets); + free(sizes); + } + + unsigned + allocate(unsigned size) + { + if (capacity <= count) { + capacity = MAX2(16, capacity * 2); + sizes = (unsigned *)realloc(sizes, capacity * sizeof(unsigned)); + offsets = (unsigned *)realloc(offsets, capacity * sizeof(unsigned)); + } + + sizes[count] = size; + offsets[count] = total_size; + total_size += size; + + return count++; + } + + /** + * Array of sizes for each allocation. The allocation unit is up to the + * back-end, but it's expected to be one scalar value in the FS back-end + * and one vec4 in the VEC4 back-end. + */ + unsigned *sizes; + + /** + * Array of offsets from the start of the VGRF space in allocation + * units. + */ + unsigned *offsets; + + /** Total number of VGRFs allocated. */ + unsigned count; + + /** Cumulative size in allocation units. */ + unsigned total_size; + + private: + unsigned capacity; + }; +} + +#endif diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 40b5715cccd..78666fd222f 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -544,9 +544,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->dst.file == GRF) { if (remaining_grf_uses[inst->dst.reg] == 1) - benefit += v->virtual_grf_sizes[inst->dst.reg]; + benefit += v->alloc.sizes[inst->dst.reg]; if (!grf_active[inst->dst.reg]) - benefit -= v->virtual_grf_sizes[inst->dst.reg]; + benefit -= v->alloc.sizes[inst->dst.reg]; } for (int i = 0; i < inst->sources; i++) { @@ -554,9 +554,9 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) continue; if (remaining_grf_uses[inst->src[i].reg] == 1) - benefit += v->virtual_grf_sizes[inst->src[i].reg]; + benefit += v->alloc.sizes[inst->src[i].reg]; if (!grf_active[inst->src[i].reg]) - benefit -= v->virtual_grf_sizes[inst->src[i].reg]; + benefit -= v->alloc.sizes[inst->src[i].reg]; } return benefit; @@ -1503,7 +1503,7 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode) if (mode == SCHEDULE_POST) grf_count = grf_used; else - grf_count = virtual_grf_count; + grf_count = alloc.count; fs_instruction_scheduler sched(this, grf_count, mode); sched.run(cfg); diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 5ad87d6278b..ab3ad60e02b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -27,6 +27,10 @@ #include "main/compiler.h" #include "glsl/ir.h" +#ifdef __cplusplus +#include "brw_ir_allocator.h" +#endif + #pragma once enum PACKED register_file { @@ -172,6 +176,8 @@ public: gl_shader_stage stage; + brw::simple_allocator alloc; + virtual void dump_instruction(backend_instruction *inst) = 0; virtual void dump_instruction(backend_instruction *inst, FILE *file) = 0; virtual void dump_instructions(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 98fad6c8f87..f2339b399e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1209,7 +1209,7 @@ vec4_visitor::opt_register_coalesce() void vec4_visitor::split_virtual_grfs() { - int num_vars = this->virtual_grf_count; + int num_vars = this->alloc.count; int new_virtual_grf[num_vars]; bool split_grf[num_vars]; @@ -1217,7 +1217,7 @@ vec4_visitor::split_virtual_grfs() /* Try to split anything > 0 sized. */ for (int i = 0; i < num_vars; i++) { - split_grf[i] = this->virtual_grf_sizes[i] != 1; + split_grf[i] = this->alloc.sizes[i] != 1; } /* Check that the instructions are compatible with the registers we're trying @@ -1243,13 +1243,13 @@ vec4_visitor::split_virtual_grfs() if (!split_grf[i]) continue; - new_virtual_grf[i] = virtual_grf_alloc(1); - for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { - int reg = virtual_grf_alloc(1); + new_virtual_grf[i] = alloc.allocate(1); + for (unsigned j = 2; j < this->alloc.sizes[i]; j++) { + unsigned reg = alloc.allocate(1); assert(reg == new_virtual_grf[i] + j - 1); (void) reg; } - this->virtual_grf_sizes[i] = 1; + this->alloc.sizes[i] = 1; } foreach_block_and_inst(block, vec4_instruction, inst, cfg) { @@ -1432,7 +1432,7 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) /* Don't print .0; and only VGRFs have reg_offsets and sizes */ if (inst->src[i].reg_offset != 0 && inst->src[i].file == GRF && - virtual_grf_sizes[inst->src[i].reg] != 1) + alloc.sizes[inst->src[i].reg] != 1) fprintf(file, ".%d", inst->src[i].reg_offset); if (inst->src[i].file != IMM) { @@ -1834,9 +1834,9 @@ vec4_visitor::run() if (false) { /* Debug of register spilling: Go spill everything. */ - const int grf_count = virtual_grf_count; - float spill_costs[virtual_grf_count]; - bool no_spill[virtual_grf_count]; + const int grf_count = alloc.count; + float spill_costs[alloc.count]; + bool no_spill[alloc.count]; evaluate_spill_costs(spill_costs, no_spill); for (int i = 0; i < grf_count; i++) { if (no_spill[i]) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 980544d3dbe..6b710c9bc31 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -275,9 +275,6 @@ public: const void *base_ir; const char *current_annotation; - int *virtual_grf_sizes; - int virtual_grf_count; - int virtual_grf_array_size; int first_non_payload_grf; unsigned int max_grf; int *virtual_grf_start; @@ -285,14 +282,6 @@ public: brw::vec4_live_variables *live_intervals; dst_reg userplane[MAX_CLIP_PLANES]; - /** - * This is the size to be used for an array with an element per - * reg_offset - */ - int virtual_grf_reg_count; - /** Per-virtual-grf indices into an array of size virtual_grf_reg_count */ - int *virtual_grf_reg_map; - dst_reg *variable_storage(ir_variable *var); void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); @@ -347,7 +336,6 @@ public: bool run(void); void fail(const char *msg, ...); - int virtual_grf_alloc(int size); void setup_uniform_clipplane_values(); void setup_uniform_values(ir_variable *ir); void setup_builtin_uniform_values(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp index 638d99a4c92..81567d2b295 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp @@ -346,7 +346,7 @@ bool vec4_visitor::opt_copy_propagation(bool do_constant_prop) { bool progress = false; - struct copy_entry entries[virtual_grf_reg_count]; + struct copy_entry entries[alloc.total_size]; memset(&entries, 0, sizeof(entries)); @@ -375,7 +375,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) inst->src[i].reladdr) continue; - int reg = (virtual_grf_reg_map[inst->src[i].reg] + + int reg = (alloc.offsets[inst->src[i].reg] + inst->src[i].reg_offset); /* Find the regs that each swizzle component came from. @@ -418,7 +418,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) /* Track available source registers. */ if (inst->dst.file == GRF) { const int reg = - virtual_grf_reg_map[inst->dst.reg] + inst->dst.reg_offset; + alloc.offsets[inst->dst.reg] + inst->dst.reg_offset; /* Update our destination's current channel values. For a direct copy, * the value is the newly propagated source. Otherwise, we don't know @@ -439,7 +439,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop) if (inst->dst.reladdr) memset(&entries, 0, sizeof(entries)); else { - for (int i = 0; i < virtual_grf_reg_count; i++) { + for (unsigned i = 0; i < alloc.total_size; i++) { for (int j = 0; j < 4; j++) { if (is_channel_updated(inst, entries[i].value, j)){ entries[i].value[j] = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index ee50419dc9a..5fb8f3166ce 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -241,7 +241,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) * more -- a sure sign they'll fail operands_match(). */ if (src->file == GRF) { - assert((src->reg * 4 + 3) < (virtual_grf_count * 4)); + assert((unsigned)(src->reg * 4 + 3) < (alloc.count * 4)); int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0], virtual_grf_end[src->reg * 4 + 1]), diff --git a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp index 98350691db2..c562b2e6800 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp @@ -96,7 +96,7 @@ vec4_live_variables::setup_def_use() * variable, and thus qualify for being in def[]. */ if (inst->dst.file == GRF && - v->virtual_grf_sizes[inst->dst.reg] == 1 && + v->alloc.sizes[inst->dst.reg] == 1 && !inst->predicate) { for (int c = 0; c < 4; c++) { if (inst->dst.writemask & (1 << c)) { @@ -180,7 +180,7 @@ vec4_live_variables::vec4_live_variables(vec4_visitor *v, cfg_t *cfg) { mem_ctx = ralloc_context(NULL); - num_vars = v->virtual_grf_count * 4; + num_vars = v->alloc.count * 4; block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); bitset_words = BITSET_WORDS(num_vars); @@ -230,14 +230,14 @@ vec4_visitor::calculate_live_intervals() if (this->live_intervals) return; - int *start = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4); - int *end = ralloc_array(mem_ctx, int, this->virtual_grf_count * 4); + int *start = ralloc_array(mem_ctx, int, this->alloc.count * 4); + int *end = ralloc_array(mem_ctx, int, this->alloc.count * 4); ralloc_free(this->virtual_grf_start); ralloc_free(this->virtual_grf_end); this->virtual_grf_start = start; this->virtual_grf_end = end; - for (int i = 0; i < this->virtual_grf_count * 4; i++) { + for (unsigned i = 0; i < this->alloc.count * 4; i++) { start[i] = MAX_INSTRUCTION; end[i] = -1; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index e8e2185ac1a..b944d454df6 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -45,15 +45,14 @@ assign(unsigned int *reg_hw_locations, backend_reg *reg) bool vec4_visitor::reg_allocate_trivial() { - unsigned int hw_reg_mapping[this->virtual_grf_count]; - bool virtual_grf_used[this->virtual_grf_count]; - int i; + unsigned int hw_reg_mapping[this->alloc.count]; + bool virtual_grf_used[this->alloc.count]; int next; /* Calculate which virtual GRFs are actually in use after whatever * optimization passes have occurred. */ - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { virtual_grf_used[i] = false; } @@ -61,18 +60,18 @@ vec4_visitor::reg_allocate_trivial() if (inst->dst.file == GRF) virtual_grf_used[inst->dst.reg] = true; - for (int i = 0; i < 3; i++) { + for (unsigned i = 0; i < 3; i++) { if (inst->src[i].file == GRF) virtual_grf_used[inst->src[i].reg] = true; } } hw_reg_mapping[0] = this->first_non_payload_grf; - next = hw_reg_mapping[0] + this->virtual_grf_sizes[0]; - for (i = 1; i < this->virtual_grf_count; i++) { + next = hw_reg_mapping[0] + this->alloc.sizes[0]; + for (unsigned i = 1; i < this->alloc.count; i++) { if (virtual_grf_used[i]) { hw_reg_mapping[i] = next; - next += this->virtual_grf_sizes[i]; + next += this->alloc.sizes[i]; } } prog_data->total_grf = next; @@ -176,7 +175,7 @@ bool vec4_visitor::reg_allocate() { struct intel_screen *screen = brw->intelScreen; - unsigned int hw_reg_mapping[virtual_grf_count]; + unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; /* Using the trivial allocator can be useful in debugging undefined @@ -187,19 +186,19 @@ vec4_visitor::reg_allocate() calculate_live_intervals(); - int node_count = virtual_grf_count; + int node_count = alloc.count; int first_payload_node = node_count; node_count += payload_reg_count; struct ra_graph *g = ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count); - for (int i = 0; i < virtual_grf_count; i++) { - int size = this->virtual_grf_sizes[i]; + for (unsigned i = 0; i < alloc.count; i++) { + int size = this->alloc.sizes[i]; assert(size >= 1 && size <= 2 && "Register allocation relies on split_virtual_grfs()."); ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]); - for (int j = 0; j < i; j++) { + for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -230,12 +229,12 @@ vec4_visitor::reg_allocate() * numbers. */ prog_data->total_grf = payload_reg_count; - for (int i = 0; i < virtual_grf_count; i++) { + for (unsigned i = 0; i < alloc.count; i++) { int reg = ra_get_node_reg(g, i); hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg]; prog_data->total_grf = MAX2(prog_data->total_grf, - hw_reg_mapping[i] + virtual_grf_sizes[i]); + hw_reg_mapping[i] + alloc.sizes[i]); } foreach_block_and_inst(block, vec4_instruction, inst, cfg) { @@ -255,9 +254,9 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) { float loop_scale = 1.0; - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { spill_costs[i] = 0.0; - no_spill[i] = virtual_grf_sizes[i] != 1; + no_spill[i] = alloc.sizes[i] != 1; } /* Calculate costs for spilling nodes. Call it a cost of 1 per @@ -308,12 +307,12 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) int vec4_visitor::choose_spill_reg(struct ra_graph *g) { - float spill_costs[this->virtual_grf_count]; - bool no_spill[this->virtual_grf_count]; + float spill_costs[this->alloc.count]; + bool no_spill[this->alloc.count]; evaluate_spill_costs(spill_costs, no_spill); - for (int i = 0; i < this->virtual_grf_count; i++) { + for (unsigned i = 0; i < this->alloc.count; i++) { if (!no_spill[i]) ra_set_node_spill_cost(g, i, spill_costs[i]); } @@ -324,7 +323,7 @@ vec4_visitor::choose_spill_reg(struct ra_graph *g) void vec4_visitor::spill_reg(int spill_reg_nr) { - assert(virtual_grf_sizes[spill_reg_nr] == 1); + assert(alloc.sizes[spill_reg_nr] == 1); unsigned int spill_offset = c->last_scratch++; /* Generate spill/unspill instructions for the objects being spilled. */ @@ -332,7 +331,7 @@ vec4_visitor::spill_reg(int spill_reg_nr) for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == GRF && inst->src[i].reg == spill_reg_nr) { src_reg spill_reg = inst->src[i]; - inst->src[i].reg = virtual_grf_alloc(1); + inst->src[i].reg = alloc.allocate(1); dst_reg temp = dst_reg(inst->src[i]); emit_scratch_read(block, inst, temp, spill_reg, spill_offset); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index e6a7ed06020..7d5221386cb 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -616,31 +616,12 @@ type_size(const struct glsl_type *type) return 0; } -int -vec4_visitor::virtual_grf_alloc(int size) -{ - if (virtual_grf_array_size <= virtual_grf_count) { - if (virtual_grf_array_size == 0) - virtual_grf_array_size = 16; - else - virtual_grf_array_size *= 2; - virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, - virtual_grf_array_size); - virtual_grf_reg_map = reralloc(mem_ctx, virtual_grf_reg_map, int, - virtual_grf_array_size); - } - virtual_grf_reg_map[virtual_grf_count] = virtual_grf_reg_count; - virtual_grf_reg_count += size; - virtual_grf_sizes[virtual_grf_count] = size; - return virtual_grf_count++; -} - src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type) { init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type)); + this->reg = v->alloc.allocate(type_size(type)); if (type->is_array() || type->is_record()) { this->swizzle = BRW_SWIZZLE_NOOP; @@ -658,7 +639,7 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type) * size); + this->reg = v->alloc.allocate(type_size(type) * size); this->swizzle = BRW_SWIZZLE_NOOP; @@ -670,7 +651,7 @@ dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) init(); this->file = GRF; - this->reg = v->virtual_grf_alloc(type_size(type)); + this->reg = v->alloc.allocate(type_size(type)); if (type->is_array() || type->is_record()) { this->writemask = WRITEMASK_XYZW; @@ -3372,7 +3353,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, void vec4_visitor::move_grf_array_access_to_scratch() { - int scratch_loc[this->virtual_grf_count]; + int scratch_loc[this->alloc.count]; memset(scratch_loc, -1, sizeof(scratch_loc)); /* First, calculate the set of virtual GRFs that need to be punted @@ -3383,7 +3364,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (inst->dst.file == GRF && inst->dst.reladdr && scratch_loc[inst->dst.reg] == -1) { scratch_loc[inst->dst.reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[inst->dst.reg]; + c->last_scratch += this->alloc.sizes[inst->dst.reg]; } for (int i = 0 ; i < 3; i++) { @@ -3392,7 +3373,7 @@ vec4_visitor::move_grf_array_access_to_scratch() if (src->file == GRF && src->reladdr && scratch_loc[src->reg] == -1) { scratch_loc[src->reg] = c->last_scratch; - c->last_scratch += this->virtual_grf_sizes[src->reg]; + c->last_scratch += this->alloc.sizes[src->reg]; } } } @@ -3612,11 +3593,6 @@ vec4_visitor::vec4_visitor(struct brw_context *brw, this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; - this->virtual_grf_sizes = NULL; - this->virtual_grf_count = 0; - this->virtual_grf_reg_map = NULL; - this->virtual_grf_reg_count = 0; - this->virtual_grf_array_size = 0; this->live_intervals = NULL; this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; |