diff options
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 46 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_ir_fs.h | 6 |
3 files changed, 34 insertions, 32 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 0244f593149..8f1cd61d3b6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -818,7 +818,7 @@ fs_inst::components_read(unsigned i) const } int -fs_inst::regs_read(int arg) const +fs_inst::size_read(int arg) const { switch (opcode) { case FS_OPCODE_FB_WRITE: @@ -837,28 +837,28 @@ fs_inst::regs_read(int arg) const case SHADER_OPCODE_TYPED_SURFACE_WRITE: case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: if (arg == 0) - return mlen; + return mlen * REG_SIZE; break; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: /* The payload is actually stored in src1 */ if (arg == 1) - return mlen; + return mlen * REG_SIZE; break; case FS_OPCODE_LINTERP: if (arg == 1) - return 1; + return REG_SIZE; break; case SHADER_OPCODE_LOAD_PAYLOAD: if (arg < this->header_size) - return 1; + return REG_SIZE; break; case CS_OPCODE_CS_TERMINATE: case SHADER_OPCODE_BARRIER: - return 1; + return REG_SIZE; case SHADER_OPCODE_MOV_INDIRECT: if (arg == 0) { @@ -867,7 +867,7 @@ fs_inst::regs_read(int arg) const if (src[0].file == UNIFORM) { assert(region_length % 4 == 0); - return region_length / 4; + return region_length; } else if (src[0].file == FIXED_GRF) { /* If the start of the region is not register aligned, then * there's some portion of the register that's technically @@ -884,7 +884,7 @@ fs_inst::regs_read(int arg) const if (src[0].subnr) region_length += src[0].subnr; - return DIV_ROUND_UP(region_length, REG_SIZE); + return region_length; } else { assert(!"Invalid register file"); } @@ -893,22 +893,20 @@ fs_inst::regs_read(int arg) const default: if (is_tex() && arg == 0 && src[0].file == VGRF) - return mlen; + return mlen * REG_SIZE; break; } switch (src[arg].file) { case UNIFORM: case IMM: - return 1; + return 4; case BAD_FILE: case ARF: case FIXED_GRF: case VGRF: case ATTR: - return DIV_ROUND_UP(components_read(arg) * - src[arg].component_size(exec_size), - REG_SIZE); + return components_read(arg) * src[arg].component_size(exec_size); case MRF: unreachable("MRF registers are not allowed as sources"); } @@ -2547,7 +2545,7 @@ fs_visitor::opt_sampler_eot() for (unsigned i = 0; i < FB_WRITE_LOGICAL_NUM_SRCS; i++) { if (i == FB_WRITE_LOGICAL_SRC_COLOR0) { if (!fb_write->src[i].equals(tex_inst->dst) || - fb_write->regs_read(i) * REG_SIZE != tex_inst->size_written) + fb_write->size_read(i) != tex_inst->size_written) return false; } else if (i != FB_WRITE_LOGICAL_SRC_COMPONENTS) { if (fb_write->src[i].file != BAD_FILE) @@ -2730,7 +2728,7 @@ fs_visitor::compute_to_mrf() foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->regs_read(0) * REG_SIZE)) { + inst->src[0], inst->size_read(0))) { /* Found the last thing to write our reg we want to turn * into a compute-to-MRF. */ @@ -2749,7 +2747,7 @@ fs_visitor::compute_to_mrf() */ if (scan_inst->dst.offset / REG_SIZE < inst->src[0].offset / REG_SIZE || scan_inst->dst.offset / REG_SIZE + DIV_ROUND_UP(scan_inst->size_written, REG_SIZE) > - inst->src[0].offset / REG_SIZE + inst->regs_read(0)) + inst->src[0].offset / REG_SIZE + DIV_ROUND_UP(inst->size_read(0), REG_SIZE)) break; /* SEND instructions can't have MRF as a destination. */ @@ -2785,8 +2783,8 @@ fs_visitor::compute_to_mrf() */ bool interfered = false; for (int i = 0; i < scan_inst->sources; i++) { - if (regions_overlap(scan_inst->src[i], scan_inst->regs_read(i) * REG_SIZE, - inst->src[0], inst->regs_read(0) * REG_SIZE)) { + if (regions_overlap(scan_inst->src[i], scan_inst->size_read(i), + inst->src[0], inst->size_read(0))) { interfered = true; } } @@ -2823,7 +2821,7 @@ fs_visitor::compute_to_mrf() foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->regs_read(0) * REG_SIZE)) { + inst->src[0], inst->size_read(0))) { /* Clear the bits for any registers this instruction overwrites. */ regs_left &= ~mask_relative_to( inst->src[0], scan_inst->dst, DIV_ROUND_UP(scan_inst->size_written, @@ -3027,7 +3025,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (last_mrf_move[i] && regions_overlap(inst->dst, inst->size_written, last_mrf_move[i]->src[0], - last_mrf_move[i]->regs_read(0) * REG_SIZE)) { + last_mrf_move[i]->size_read(0))) { last_mrf_move[i] = NULL; } } @@ -4607,7 +4605,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo, unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE); for (unsigned i = 0; i < inst->sources; i++) - reg_count = MAX2(reg_count, (unsigned)inst->regs_read(i)); + reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); /* Calculate the maximum execution size of the instruction based on the * factor by which it goes over the hardware limit of 2 GRFs. @@ -4632,7 +4630,7 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo, if (devinfo->gen < 8) { for (unsigned i = 0; i < inst->sources; i++) { if (DIV_ROUND_UP(inst->size_written, REG_SIZE) == 2 && - inst->regs_read(i) != 0 && inst->regs_read(i) != 2 && + inst->size_read(i) != 0 && DIV_ROUND_UP(inst->size_read(i), REG_SIZE) != 2 && !is_uniform(inst->src[i]) && !(type_sz(inst->dst.type) == 4 && inst->dst.stride == 1 && type_sz(inst->src[i].type) == 2 && inst->src[i].stride == 1)) { @@ -5114,7 +5112,7 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst) * the data read from the same source by other lowered instructions. */ if (regions_overlap(inst->dst, inst->size_written, - inst->src[i], inst->regs_read(i) * REG_SIZE) && + inst->src[i], inst->size_read(i)) && !inst->dst.equals(inst->src[i])) return true; } @@ -5371,7 +5369,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) switch (inst->src[i].file) { case VGRF: fprintf(file, "vgrf%d", inst->src[i].nr); - if (alloc.sizes[inst->src[i].nr] != (unsigned)inst->regs_read(i) || + if (alloc.sizes[inst->src[i].nr] * REG_SIZE != inst->size_read(i) || inst->src[i].offset % REG_SIZE != 0) fprintf(file, "+%d.%d", inst->src[i].offset / REG_SIZE, inst->src[i].offset % REG_SIZE); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 0e239d28d44..f8238aa6d77 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -44,7 +44,7 @@ struct acp_entry : public exec_node { fs_reg dst; fs_reg src; uint8_t size_written; - uint8_t regs_read; + uint8_t size_read; enum opcode opcode; bool saturate; }; @@ -367,7 +367,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) /* Bail if inst is reading a range that isn't contained in the range * that entry is writing. */ - if (!region_contained_in(inst->src[arg], inst->regs_read(arg), + if (!region_contained_in(inst->src[arg], DIV_ROUND_UP(inst->size_read(arg), + REG_SIZE), entry->dst, DIV_ROUND_UP(entry->size_written, REG_SIZE))) return false; @@ -524,7 +525,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) /* Bail if inst is reading a range that isn't contained in the range * that entry is writing. */ - if (!region_contained_in(inst->src[i], inst->regs_read(i), + if (!region_contained_in(inst->src[i], DIV_ROUND_UP(inst->size_read(i), + REG_SIZE), entry->dst, DIV_ROUND_UP(entry->size_written, REG_SIZE))) continue; @@ -785,7 +787,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, /* Make sure we kill the entry if this instruction overwrites * _any_ of the registers that it reads */ - if (regions_overlap(entry->src, entry->regs_read * REG_SIZE, + if (regions_overlap(entry->src, entry->size_read, inst->dst, inst->size_written)) entry->remove(); } @@ -800,7 +802,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, entry->dst = inst->dst; entry->src = inst->src[0]; entry->size_written = inst->size_written; - entry->regs_read = inst->regs_read(0); + entry->size_read = inst->size_read(0); entry->opcode = inst->opcode; entry->saturate = inst->saturate; acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); @@ -818,7 +820,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, entry->dst.offset += offset * REG_SIZE; entry->src = inst->src[i]; entry->size_written = size_written; - entry->regs_read = inst->regs_read(i); + entry->size_read = inst->size_read(i); entry->opcode = inst->opcode; if (!entry->dst.equals(inst->src[i])) { acp[entry->dst.nr % ACP_HASH_SIZE].push_tail(entry); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index cea81e4646a..2e5c8e54d22 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -324,7 +324,7 @@ public: bool is_partial_write() const; bool is_copy_payload(const brw::simple_allocator &grf_alloc) const; unsigned components_read(unsigned i) const; - int regs_read(int arg) const; + int size_read(int arg) const; bool can_do_source_mods(const struct gen_device_info *devinfo); bool can_change_types() const; bool has_side_effects() const; @@ -435,7 +435,9 @@ inline unsigned regs_read(const fs_inst *inst, unsigned i) { /* XXX - Take into account register-misaligned offsets correctly. */ - return inst->regs_read(i); + const unsigned reg_size = + inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE; + return DIV_ROUND_UP(inst->size_read(i), reg_size); } #endif |