summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2018-05-23 17:54:54 -0700
committerJason Ekstrand <[email protected]>2018-06-28 13:19:38 -0700
commit5e3028d8267817a5b9669bfb736722d9adb156d5 (patch)
tree342bd5f66750fee43501ce3d2cd7183843e455e5 /src
parent40fe108e2b655b22b377ee92b4463a6362ba7b54 (diff)
intel/fs: Emit MOV_DISPATCH_TO_FLAGS once for the centroid workaround
There's no reason for us to emit it a pile of times and then have a whole pass to clean it up. Just emit it once like we really want. Reviewed-by: Matt Turner <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs.cpp39
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp27
2 files changed, 16 insertions, 50 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index afbe9108112..8d880969f66 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6211,44 +6211,6 @@ fs_visitor::calculate_register_pressure()
}
}
-/**
- * Look for repeated FS_OPCODE_MOV_DISPATCH_TO_FLAGS and drop the later ones.
- *
- * The needs_unlit_centroid_workaround ends up producing one of these per
- * channel of centroid input, so it's good to clean them up.
- *
- * An assumption here is that nothing ever modifies the dispatched pixels
- * value that FS_OPCODE_MOV_DISPATCH_TO_FLAGS reads from, but the hardware
- * dictates that anyway.
- */
-bool
-fs_visitor::opt_drop_redundant_mov_to_flags()
-{
- bool flag_mov_found[4] = {false};
- bool progress = false;
-
- /* Instructions removed by this pass can only be added if this were true */
- if (!devinfo->needs_unlit_centroid_workaround)
- return false;
-
- foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
- if (inst->is_control_flow()) {
- memset(flag_mov_found, 0, sizeof(flag_mov_found));
- } else if (inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
- if (!flag_mov_found[inst->flag_subreg]) {
- flag_mov_found[inst->flag_subreg] = true;
- } else {
- inst->remove(block);
- progress = true;
- }
- } else if (inst->flags_written()) {
- flag_mov_found[inst->flag_subreg] = false;
- }
- }
-
- return progress;
-}
-
void
fs_visitor::optimize()
{
@@ -6306,7 +6268,6 @@ fs_visitor::optimize()
int iteration = 0;
int pass_num = 0;
- OPT(opt_drop_redundant_mov_to_flags);
OPT(remove_extra_rounding_modes);
do {
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index ed1348bec00..d27e9897b45 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -263,23 +263,28 @@ fs_visitor::emit_interpolation_setup_gen6()
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data);
- uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
- (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID |
- 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID);
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
this->delta_xy[i] = fetch_payload_reg(
bld, payload.barycentric_coord_reg[i], BRW_REGISTER_TYPE_F, 2);
+ }
- if (devinfo->needs_unlit_centroid_workaround &&
- (centroid_modes & (1 << i))) {
- const fs_reg &pixel_delta_xy = delta_xy[i - 1];
+ uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
+ (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID |
+ 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID);
- /* Get the pixel/sample mask into f0 so that we know which
- * pixels are lit. Then, for each channel that is unlit,
- * replace the centroid data with non-centroid data.
- */
- bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+ if (devinfo->needs_unlit_centroid_workaround && centroid_modes) {
+ /* Get the pixel/sample mask into f0 so that we know which
+ * pixels are lit. Then, for each channel that is unlit,
+ * replace the centroid data with non-centroid data.
+ */
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+
+ for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
+ if (!(centroid_modes & (1 << i)))
+ continue;
+
+ const fs_reg &pixel_delta_xy = delta_xy[i - 1];
for (unsigned q = 0; q < dispatch_width / 8; q++) {
for (unsigned c = 0; c < 2; c++) {