diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 28 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_validate.cpp | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_ir_fs.h | 26 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 30 |
9 files changed, 83 insertions, 57 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 04f04572d83..802aa9f76f4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1772,13 +1772,13 @@ fs_visitor::split_virtual_grfs() foreach_block_and_inst(block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; - for (int j = 1; j < inst->regs_written; j++) + for (unsigned j = 1; j < regs_written(inst); j++) split_points[reg + j] = false; } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { int reg = vgrf_to_reg[inst->src[i].nr] + inst->src[i].offset / REG_SIZE; - for (int j = 1; j < inst->regs_read(i); j++) + for (unsigned j = 1; j < regs_read(inst, i); j++) split_points[reg + j] = false; } } @@ -2611,7 +2611,7 @@ fs_visitor::opt_register_renaming() if (remap[dst] == -1) { remap[dst] = dst; } else { - remap[dst] = alloc.allocate(inst->regs_written); + remap[dst] = alloc.allocate(regs_written(inst)); inst->dst.nr = remap[dst]; progress = true; } @@ -2727,7 +2727,7 @@ fs_visitor::compute_to_mrf() * regs_left bitset keeps track of the registers we haven't yet found a * generating instruction for. */ - unsigned regs_left = (1 << inst->regs_read(0)) - 1; + unsigned regs_left = (1 << regs_read(inst, 0)) - 1; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE, @@ -2819,7 +2819,7 @@ fs_visitor::compute_to_mrf() /* Found all generating instructions of our MRF's source value, so it * should be safe to rewrite them to point to the MRF directly. */ - regs_left = (1 << inst->regs_read(0)) - 1; + regs_left = (1 << regs_read(inst, 0)) - 1; foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE, @@ -3086,7 +3086,7 @@ void fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { - int write_len = inst->regs_written; + int write_len = regs_written(inst); int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -3119,7 +3119,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, * dependency has more latency than a MOV. */ if (scan_inst->dst.file == VGRF) { - for (int i = 0; i < scan_inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(scan_inst); i++) { int reg = scan_inst->dst.nr + i; if (reg >= first_write_grf && @@ -3157,7 +3157,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block, void fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_inst *inst) { - int write_len = inst->regs_written; + int write_len = regs_written(inst); int first_write_grf = inst->dst.nr; bool needs_dep[BRW_MAX_MRF(devinfo->gen)]; assert(write_len < (int)sizeof(needs_dep) - 1); @@ -3800,7 +3800,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, /* Send from the GRF */ fs_reg payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F); load = bld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); - payload.nr = bld.shader->alloc.allocate(load->regs_written); + payload.nr = bld.shader->alloc.allocate(regs_written(load)); load->dst = payload; inst->src[0] = payload; @@ -3821,7 +3821,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, } inst->opcode = FS_OPCODE_FB_WRITE; - inst->mlen = load->regs_written; + inst->mlen = regs_written(load); inst->header_size = header_size; } @@ -4069,7 +4069,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, unsigned grad_components) { const gen_device_info *devinfo = bld.shader->devinfo; - int reg_width = bld.dispatch_width() / 8; + unsigned reg_width = bld.dispatch_width() / 8; unsigned header_size = 0, length = 0; fs_reg sources[MAX_SAMPLER_MESSAGE_SIZE]; for (unsigned i = 0; i < ARRAY_SIZE(sources); i++) @@ -4097,9 +4097,9 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op, * and we have an explicit header, we need to set up the sampler * writemask. It's reversed from normal: 1 means "don't write". */ - if (!inst->eot && inst->regs_written != 4 * reg_width) { - assert((inst->regs_written % reg_width) == 0); - unsigned mask = ~((1 << (inst->regs_written / reg_width)) - 1) & 0xf; + if (!inst->eot && regs_written(inst) != 4 * reg_width) { + assert(regs_written(inst) % reg_width == 0); + unsigned mask = ~((1 << (regs_written(inst) / reg_width)) - 1) & 0xf; inst->offset |= mask << 12; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 0c65c5b94b7..4744142a4b6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -199,8 +199,8 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) static void create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) { - int written = inst->regs_written; - int dst_width = + unsigned written = regs_written(inst); + unsigned dst_width = DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); fs_inst *copy; @@ -234,7 +234,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } - assert(copy->regs_written == written); + assert(regs_written(copy) == written); } bool @@ -284,7 +284,7 @@ fs_visitor::opt_cse_local(bblock_t *block) if (no_existing_temp && !entry->generator->dst.is_null()) { const fs_builder ibld = fs_builder(this, block, entry->generator) .at(block, entry->generator->next); - int written = entry->generator->regs_written; + int written = regs_written(entry->generator); entry->tmp = fs_reg(VGRF, alloc.allocate(written), entry->generator->dst.type); diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index 45f5c5ebb9b..4558bd42a24 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -55,12 +55,12 @@ fs_visitor::dead_code_eliminate() if (inst->dst.file == VGRF && !inst->has_side_effects()) { bool result_live = false; - if (inst->regs_written == 1) { + if (regs_written(inst) == 1) { int var = live_intervals->var_from_reg(inst->dst); result_live = BITSET_TEST(live, var); } else { int var = live_intervals->var_from_reg(inst->dst); - for (int i = 0; i < inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(inst); i++) { result_live = result_live || BITSET_TEST(live, var + i); } } @@ -96,7 +96,7 @@ fs_visitor::dead_code_eliminate() if (inst->dst.file == VGRF) { if (!inst->is_partial_write()) { int var = live_intervals->var_from_reg(inst->dst); - for (int i = 0; i < inst->regs_written; i++) { + for (unsigned i = 0; i < regs_written(inst); i++) { BITSET_CLEAR(live, var + i); } } @@ -114,7 +114,7 @@ fs_visitor::dead_code_eliminate() if (inst->src[i].file == VGRF) { int var = live_intervals->var_from_reg(inst->src[i]); - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { BITSET_SET(live, var + j); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 02dc7774427..a6c98e33218 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -118,7 +118,7 @@ fs_live_variables::setup_def_use() if (reg.file != VGRF) continue; - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { setup_one_read(bd, inst, ip, reg); reg.offset += REG_SIZE; } @@ -129,7 +129,7 @@ fs_live_variables::setup_def_use() /* Set def[] for this instruction */ if (inst->dst.file == VGRF) { fs_reg reg = inst->dst; - for (int j = 0; j < inst->regs_written; j++) { + for (unsigned j = 0; j < regs_written(inst); j++) { setup_one_write(bd, inst, ip, reg); reg.offset += REG_SIZE; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 82adaa35166..572735a379a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -362,9 +362,9 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count, if (node_nr >= payload_node_count) continue; - for (int j = 0; j < inst->regs_read(i); j++) { + for (unsigned j = 0; j < regs_read(inst, i); j++) { payload_last_use_ip[node_nr + j] = use_ip; - assert(node_nr + j < payload_node_count); + assert(node_nr + j < unsigned(payload_node_count)); } } } @@ -903,10 +903,10 @@ fs_visitor::spill_reg(int spill_reg) for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF && inst->src[i].nr == spill_reg) { - int regs_read = inst->regs_read(i); + int count = regs_read(inst, i); int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE); - fs_reg unspill_dst(VGRF, alloc.allocate(regs_read)); + fs_reg unspill_dst(VGRF, alloc.allocate(count)); inst->src[i].nr = unspill_dst.nr; inst->src[i].offset %= REG_SIZE; @@ -916,7 +916,7 @@ fs_visitor::spill_reg(int spill_reg) * hardware) up to the maximum supported block size. */ const unsigned width = - MIN2(32, 1u << (ffs(MAX2(1, regs_read) * 8) - 1)); + MIN2(32, 1u << (ffs(MAX2(1, count) * 8) - 1)); /* Set exec_all() on unspill messages under the (rather * pessimistic) assumption that there is no one-to-one @@ -926,7 +926,7 @@ fs_visitor::spill_reg(int spill_reg) * unspill destination is a block-local temporary. */ emit_unspill(ibld.exec_all().group(width, 0), - unspill_dst, subset_spill_offset, regs_read); + unspill_dst, subset_spill_offset, count); } } @@ -934,7 +934,7 @@ fs_visitor::spill_reg(int spill_reg) inst->dst.nr == spill_reg) { int subset_spill_offset = spill_offset + ROUND_DOWN_TO(inst->dst.offset, REG_SIZE); - fs_reg spill_src(VGRF, alloc.allocate(inst->regs_written)); + fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst))); inst->dst.nr = spill_src.nr; inst->dst.offset %= REG_SIZE; @@ -971,19 +971,19 @@ fs_visitor::spill_reg(int spill_reg) const fs_builder ubld = ibld.exec_all(!per_channel).group(width, 0); /* If our write is going to affect just part of the - * inst->regs_written(), then we need to unspill the destination - * since we write back out all of the regs_written(). If the - * original instruction had force_writemask_all set and is not a - * partial write, there should be no need for the unspill since the + * regs_written(inst), then we need to unspill the destination since + * we write back out all of the regs_written(). If the original + * instruction had force_writemask_all set and is not a partial + * write, there should be no need for the unspill since the * instruction will be overwriting the whole destination in any case. */ if (inst->is_partial_write() || (!inst->force_writemask_all && !per_channel)) emit_unspill(ubld, spill_src, subset_spill_offset, - inst->regs_written); + regs_written(inst)); emit_spill(ubld.at(block, inst->next), spill_src, - subset_spill_offset, inst->regs_written); + subset_spill_offset, regs_written(inst)); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 651c136dfa7..3dd0fbfc1c1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -190,7 +190,7 @@ fs_visitor::register_coalesce() dst_reg_offset[i] = i; } mov[0] = inst; - channels_remaining -= inst->regs_written; + channels_remaining -= regs_written(inst); } else { const int offset = inst->src[0].offset / REG_SIZE; if (mov[offset]) { @@ -207,7 +207,7 @@ fs_visitor::register_coalesce() if (inst->regs_written > 1) dst_reg_offset[offset + 1] = inst->dst.offset / REG_SIZE + 1; mov[offset] = inst; - channels_remaining -= inst->regs_written; + channels_remaining -= regs_written(inst); } if (channels_remaining) diff --git a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp index 10ad7c37b24..676942c19c0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_validate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_validate.cpp @@ -43,14 +43,14 @@ fs_visitor::validate() { foreach_block_and_inst (block, fs_inst, inst, cfg) { if (inst->dst.file == VGRF) { - fsv_assert(inst->dst.offset / REG_SIZE + inst->regs_written <= + fsv_assert(inst->dst.offset / REG_SIZE + regs_written(inst) <= alloc.sizes[inst->dst.nr]); } for (unsigned i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { - fsv_assert(inst->src[i].offset / REG_SIZE + inst->regs_read(i) <= - (int)alloc.sizes[inst->src[i].nr]); + fsv_assert(inst->src[i].offset / REG_SIZE + regs_read(inst, i) <= + alloc.sizes[inst->src[i].nr]); } } } diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 19ef242d166..de08a691055 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -411,4 +411,30 @@ set_saturate(bool saturate, fs_inst *inst) return inst; } +/** + * Return the number of dataflow registers written by the instruction (either + * fully or partially) counted from 'floor(reg_offset(inst->dst) / + * register_size)'. The somewhat arbitrary register size unit is 4B for the + * UNIFORM and IMM files and 32B for all other files. + */ +inline unsigned +regs_written(const fs_inst *inst) +{ + /* XXX - Take into account register-misaligned offsets correctly. */ + return inst->regs_written; +} + +/** + * Return the number of dataflow registers read by the instruction (either + * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / + * register_size)'. The somewhat arbitrary register size unit is 4B for the + * UNIFORM and IMM files and 32B for all other files. + */ +inline unsigned +regs_read(const fs_inst *inst, unsigned i) +{ + /* XXX - Take into account register-misaligned offsets correctly. */ + return inst->regs_read(i); +} + #endif diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index dde75547590..0d3a07cad5b 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -620,7 +620,7 @@ fs_instruction_scheduler::count_reads_remaining(backend_instruction *be) if (inst->src[i].nr >= hw_reg_count) continue; - for (int j = 0; j < inst->regs_read(i); j++) + for (unsigned j = 0; j < regs_read(inst, i); j++) hw_reads_remaining[inst->src[i].nr + j]++; } } @@ -702,7 +702,7 @@ fs_instruction_scheduler::update_register_pressure(backend_instruction *be) reads_remaining[inst->src[i].nr]--; } else if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (int off = 0; off < inst->regs_read(i); off++) + for (unsigned off = 0; off < regs_read(inst, i); off++) hw_reads_remaining[inst->src[i].nr + off]--; } } @@ -731,7 +731,7 @@ fs_instruction_scheduler::get_register_pressure_benefit(backend_instruction *be) if (inst->src[i].file == FIXED_GRF && inst->src[i].nr < hw_reg_count) { - for (int off = 0; off < inst->regs_read(i); off++) { + for (unsigned off = 0; off < regs_read(inst, i); off++) { int reg = inst->src[i].nr + off; if (!BITSET_TEST(hw_liveout[block_idx], reg) && hw_reads_remaining[reg] == 1) { @@ -1004,17 +1004,17 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(last_grf_write[inst->src[i].nr + r], n); } else { - for (int r = 0; r < inst->regs_read(i); r++) { + for (unsigned r = 0; r < regs_read(inst, i); r++) { add_dep(last_grf_write[inst->src[i].nr * 16 + inst->src[i].offset / REG_SIZE + r], n); } } } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(last_grf_write[inst->src[i].nr + r], n); } else { add_dep(last_fixed_grf_write, n); @@ -1052,12 +1052,12 @@ fs_instruction_scheduler::calculate_deps() /* write-after-write deps. */ if (inst->dst.file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { add_dep(last_grf_write[inst->dst.nr + r], n); last_grf_write[inst->dst.nr + r] = n; } } else { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { add_dep(last_grf_write[inst->dst.nr * 16 + inst->dst.offset / REG_SIZE + r], n); last_grf_write[inst->dst.nr * 16 + @@ -1079,7 +1079,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { last_fixed_grf_write = n; @@ -1130,17 +1130,17 @@ fs_instruction_scheduler::calculate_deps() for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { - for (int r = 0; r < inst->regs_read(i); r++) { + for (unsigned r = 0; r < regs_read(inst, i); r++) { add_dep(n, last_grf_write[inst->src[i].nr * 16 + inst->src[i].offset / REG_SIZE + r], 0); } } } else if (inst->src[i].file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_read(i); r++) + for (unsigned r = 0; r < regs_read(inst, i); r++) add_dep(n, last_grf_write[inst->src[i].nr + r], 0); } else { add_dep(n, last_fixed_grf_write, 0); @@ -1180,10 +1180,10 @@ fs_instruction_scheduler::calculate_deps() */ if (inst->dst.file == VGRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { - for (int r = 0; r < inst->regs_written; r++) { + for (unsigned r = 0; r < regs_written(inst); r++) { last_grf_write[inst->dst.nr * 16 + inst->dst.offset / REG_SIZE + r] = n; } @@ -1203,7 +1203,7 @@ fs_instruction_scheduler::calculate_deps() } } else if (inst->dst.file == FIXED_GRF) { if (post_reg_alloc) { - for (int r = 0; r < inst->regs_written; r++) + for (unsigned r = 0; r < regs_written(inst); r++) last_grf_write[inst->dst.nr + r] = n; } else { last_fixed_grf_write = n; |