diff options
author | Jason Ekstrand <[email protected]> | 2018-05-23 17:54:54 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2018-06-28 13:19:38 -0700 |
commit | 5e3028d8267817a5b9669bfb736722d9adb156d5 (patch) | |
tree | 342bd5f66750fee43501ce3d2cd7183843e455e5 /src | |
parent | 40fe108e2b655b22b377ee92b4463a6362ba7b54 (diff) |
intel/fs: Emit MOV_DISPATCH_TO_FLAGS once for the centroid workaround
There's no reason for us to emit it a pile of times and then have a
whole pass to clean it up. Just emit it once like we really want.
Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 39 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 27 |
2 files changed, 16 insertions, 50 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index afbe9108112..8d880969f66 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6211,44 +6211,6 @@ fs_visitor::calculate_register_pressure() } } -/** - * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones. - * - * The needs_unlit_centroid_workaround ends up producing one of these per - * channel of centroid input, so it's good to clean them up. - * - * An assumption here is that nothing ever modifies the dispatched pixels - * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware - * dictates that anyway. - */ -bool -fs_visitor::opt_drop_redundant_mov_to_flags() -{ - bool flag_mov_found[4] = {false}; - bool progress = false; - - /* Instructions removed by this pass can only be added if this were true */ - if (!devinfo->needs_unlit_centroid_workaround) - return false; - - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->is_control_flow()) { - memset(flag_mov_found, 0, sizeof(flag_mov_found)); - } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { - if (!flag_mov_found[inst->flag_subreg]) { - flag_mov_found[inst->flag_subreg] = true; - } else { - inst->remove(block); - progress = true; - } - } else if (inst->flags_written()) { - flag_mov_found[inst->flag_subreg] = false; - } - } - - return progress; -} - void fs_visitor::optimize() { @@ -6306,7 +6268,6 @@ fs_visitor::optimize() int iteration = 0; int pass_num = 0; - OPT(opt_drop_redundant_mov_to_flags); OPT(remove_extra_rounding_modes); do { diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index ed1348bec00..d27e9897b45 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -263,23 +263,28 @@ fs_visitor::emit_interpolation_setup_gen6() abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data); - uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes & - (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID | - 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID); for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { this->delta_xy[i] = fetch_payload_reg( bld, payload.barycentric_coord_reg[i], BRW_REGISTER_TYPE_F, 2); + } - if (devinfo->needs_unlit_centroid_workaround && - (centroid_modes & (1 << i))) { - const fs_reg &pixel_delta_xy = delta_xy[i - 1]; + uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes & + (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID | + 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID); - /* Get the pixel/sample mask into f0 so that we know which - * pixels are lit. Then, for each channel that is unlit, - * replace the centroid data with non-centroid data. - */ - bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + if (devinfo->needs_unlit_centroid_workaround && centroid_modes) { + /* Get the pixel/sample mask into f0 so that we know which + * pixels are lit. Then, for each channel that is unlit, + * replace the centroid data with non-centroid data. + */ + bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); + + for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { + if (!(centroid_modes & (1 << i))) + continue; + + const fs_reg &pixel_delta_xy = delta_xy[i - 1]; for (unsigned q = 0; q < dispatch_width / 8; q++) { for (unsigned c = 0; c < 2; c++) { |