diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_fs_register_coalesce.cpp | 50 |
1 files changed, 43 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_fs_register_coalesce.cpp b/src/intel/compiler/brw_fs_register_coalesce.cpp index e1964863519..5d24240c393 100644 --- a/src/intel/compiler/brw_fs_register_coalesce.cpp +++ b/src/intel/compiler/brw_fs_register_coalesce.cpp @@ -97,8 +97,8 @@ is_coalesce_candidate(const fs_visitor *v, const fs_inst *inst) } static bool -can_coalesce_vars(const fs_live_variables &live, - const cfg_t *cfg, const fs_inst *inst, +can_coalesce_vars(const fs_live_variables &live, const cfg_t *cfg, + const bblock_t *block, const fs_inst *inst, int dst_var, int src_var) { if (!live.vars_interfere(src_var, dst_var)) @@ -126,6 +126,8 @@ can_coalesce_vars(const fs_live_variables &live, int scan_ip = scan_block->start_ip - 1; + bool seen_src_write = false; + bool seen_copy = false; foreach_inst_in_block(fs_inst, scan_inst, scan_block) { scan_ip++; @@ -134,17 +136,51 @@ can_coalesce_vars(const fs_live_variables &live, continue; /* Ignore the copying instruction itself */ - if (scan_inst == inst) + if (scan_inst == inst) { + seen_copy = true; continue; + } if (scan_ip > end_ip) return true; /* registers do not interfere */ + if (seen_src_write && !seen_copy) { + /* In order to satisfy the guarantee of register coalescing, we + * must ensure that the two registers always have the same value + * during the intersection of their live ranges. One way to do + * this is to simply ensure that neither is ever written apart + * from the one copy which syncs up the two registers. However, + * this can be overly conservative and only works in the case + * where the destination live range is entirely contained in the + * source live range. + * + * To handle the other case where the source is contained in the + * destination, we allow writes to the source register as long as + * they happen before the copy, in the same block as the copy, and + * the destination is never read between first such write and the + * copy. This effectively moves the write from the copy up. + */ + for (int j = 0; j < scan_inst->sources; j++) { + if (regions_overlap(scan_inst->src[j], scan_inst->size_read(j), + inst->dst, inst->size_written)) + return false; /* registers interfere */ + } + } + + /* The MOV being coalesced had better be the only instruction which + * writes to the coalesce destination in the intersection. + */ if (regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->dst, inst->size_written) || - regions_overlap(scan_inst->dst, scan_inst->size_written, - inst->src[0], inst->size_read(0))) + inst->dst, inst->size_written)) return false; /* registers interfere */ + + /* See the big comment above */ + if (regions_overlap(scan_inst->dst, scan_inst->size_written, + inst->src[0], inst->size_read(0))) { + if (seen_copy || scan_block != block) + return false; + seen_src_write = true; + } } } @@ -228,7 +264,7 @@ fs_visitor::register_coalesce() dst_var[i] = live.var_from_vgrf[dst_reg] + dst_reg_offset[i]; src_var[i] = live.var_from_vgrf[src_reg] + i; - if (!can_coalesce_vars(live, cfg, inst, dst_var[i], src_var[i])) { + if (!can_coalesce_vars(live, cfg, block, inst, dst_var[i], src_var[i])) { can_coalesce = false; src_reg = ~0u; break; |