summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKenneth Graunke <[email protected]>2016-08-01 20:59:08 -0700
committerKenneth Graunke <[email protected]>2016-08-05 01:43:52 -0700
commit875341c69b99dea7942a68c9060aa31a459e93fc (patch)
tree26b757501816bb14a1571d93a0cf35b96a0d5af0
parentb521083ffb351b7fe2521a47731a46cdbb614117 (diff)
i965: Rework the unlit centroid workaround.
Previously, for every input, we moved the dispatch mask to the flag register, then emitted two predicated PLN instructions, one with centroid barycentric coordinates (for normal pixels), and one with pixel barycentric coordinates (for unlit helper pixels). Instead, we can simply emit a set of predicated MOVs at the top of the program which copy the pixel barycentric coordinates over the centroid ones for unlit helper pixel channels. Then, we can just use normal PLNs. On Sandybridge: total instructions in shared programs: 7538470 -> 7534500 (-0.05%) instructions in affected programs: 101268 -> 97298 (-3.92%) helped: 705 HURT: 9 (all of which are SIMD16 programs) Signed-off-by: Kenneth Graunke <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> Reviewed-by: Matt Turner <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp32
2 files changed, 33 insertions, 25 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c1f413b366a..e1655a4c141 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3422,31 +3422,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
interp.type = BRW_REGISTER_TYPE_F;
dest.type = BRW_REGISTER_TYPE_F;
- if (devinfo->needs_unlit_centroid_workaround &&
- bary_intrin == nir_intrinsic_load_barycentric_centroid) {
-
- /* Get the pixel/sample mask into f0 so that we know which
- * pixels are lit. Then, for each channel that is unlit,
- * replace the centroid data with non-centroid data.
- */
- bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
-
- fs_reg dest_i = offset(dest, bld, i);
- fs_reg dst_xy_pixel =
- delta_xy[brw_barycentric_mode(interp_mode,
- nir_intrinsic_load_barycentric_pixel)];
-
- fs_inst *inst;
- inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy_pixel, interp);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
- inst->no_dd_clear = true;
-
- inst = bld.emit(FS_OPCODE_LINTERP, dest_i, dst_xy, interp);
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = false;
- inst->no_dd_check = true;
- } else if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) {
+ if (devinfo->gen < 6 && interp_mode == INTERP_MODE_SMOOTH) {
fs_reg tmp = vgrf(glsl_type::float_type);
bld.emit(FS_OPCODE_LINTERP, tmp, dst_xy, interp);
bld.MUL(offset(dest, bld, i), tmp, this->pixel_w);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6d843749f44..4e0db06ca62 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -308,9 +308,41 @@ fs_visitor::emit_interpolation_setup_gen6()
this->wpos_w = vgrf(glsl_type::float_type);
abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
+ brw_wm_prog_data *wm_prog_data = (brw_wm_prog_data *) prog_data;
+ uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes &
+ (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID |
+ 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID);
+
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
uint8_t reg = payload.barycentric_coord_reg[i];
this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0));
+
+ if (devinfo->needs_unlit_centroid_workaround &&
+ (centroid_modes & (1 << i))) {
+ /* Get the pixel/sample mask into f0 so that we know which
+ * pixels are lit. Then, for each channel that is unlit,
+ * replace the centroid data with non-centroid data.
+ */
+ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
+
+ uint8_t pixel_reg = payload.barycentric_coord_reg[i - 1];
+
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true,
+ bld.half(0).MOV(brw_vec8_grf(reg, 0),
+ brw_vec8_grf(pixel_reg, 0)));
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true,
+ bld.half(0).MOV(brw_vec8_grf(reg + 1, 0),
+ brw_vec8_grf(pixel_reg + 1, 0)));
+ if (dispatch_width == 16) {
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true,
+ bld.half(1).MOV(brw_vec8_grf(reg + 2, 0),
+ brw_vec8_grf(pixel_reg + 2, 0)));
+ set_predicate_inv(BRW_PREDICATE_NORMAL, true,
+ bld.half(1).MOV(brw_vec8_grf(reg + 3, 0),
+ brw_vec8_grf(pixel_reg + 3, 0)));
+ }
+ assert(dispatch_width != 32); /* not implemented yet */
+ }
}
}