summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp93
1 files changed, 76 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index ae34642f19a..f4f435997fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -735,23 +735,82 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
break;
case nir_op_fsign: {
- /* AND(val, 0x80000000) gives the sign bit.
- *
- * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
- * zero.
- */
- bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
-
- fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
- op[0].type = BRW_REGISTER_TYPE_UD;
- result.type = BRW_REGISTER_TYPE_UD;
- bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
-
- inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
- inst->predicate = BRW_PREDICATE_NORMAL;
- if (instr->dest.saturate) {
- inst = bld.MOV(result, result);
- inst->saturate = true;
+ if (type_sz(op[0].type) < 8) {
+ /* AND(val, 0x80000000) gives the sign bit.
+ *
+ * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+ * zero.
+ */
+ bld.CMP(bld.null_reg_f(), op[0], brw_imm_f(0.0f), BRW_CONDITIONAL_NZ);
+
+ fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result.type = BRW_REGISTER_TYPE_UD;
+ bld.AND(result_int, op[0], brw_imm_ud(0x80000000u));
+
+ inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ if (instr->dest.saturate) {
+ inst = bld.MOV(result, result);
+ inst->saturate = true;
+ }
+ } else {
+ /* For doubles we do the same but we need to consider:
+ *
+ * - 2-src instructions can't operate with 64-bit immediates
+ * - The sign is encoded in the high 32-bit of each DF
+ * - CMP with DF requires special handling in SIMD16
+ * - We need to produce a DF result.
+ */
+
+ /* 2-src instructions can't have 64-bit immediates, so put 0.0 in
+ * a register and compare with that.
+ */
+ fs_reg tmp = vgrf(glsl_type::double_type);
+ bld.MOV(tmp, brw_imm_df(0.0));
+
+ /* A direct DF CMP using the flag register (null dst) won't work in
+ * SIMD16 because the CMP will be split in two by lower_simd_width,
+ * resulting in two CMP instructions with the same dst (NULL),
+ * leading to dead code elimination of the first one. In SIMD8,
+ * however, there is no need to split the CMP and we can save some
+ * work.
+ */
+ fs_reg dst_tmp = vgrf(glsl_type::double_type);
+ bld.CMP(dst_tmp, op[0], tmp, BRW_CONDITIONAL_NZ);
+
+ /* In SIMD16 we want to avoid using a NULL dst register with DF CMP,
+ * so we store the result of the comparison in a vgrf instead and
+ * then we generate a UD comparison from that that won't have to
+ * be split by lower_simd_width. This is what NIR does to handle
+ * double comparisons in the general case.
+ */
+ if (bld.dispatch_width() == 16 ) {
+ fs_reg dst_tmp_ud = retype(dst_tmp, BRW_REGISTER_TYPE_UD);
+ bld.MOV(dst_tmp_ud, subscript(dst_tmp, BRW_REGISTER_TYPE_UD, 0));
+ bld.CMP(bld.null_reg_ud(),
+ dst_tmp_ud, brw_imm_ud(0), BRW_CONDITIONAL_NZ);
+ }
+
+ /* Get the high 32-bit of each double component where the sign is */
+ fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
+ bld.MOV(result_int, subscript(op[0], BRW_REGISTER_TYPE_UD, 1));
+
+ /* Get the sign bit */
+ bld.AND(result_int, result_int, brw_imm_ud(0x80000000u));
+
+ /* Add 1.0 to the sign, predicated to skip the case of op[0] == 0.0 */
+ inst = bld.OR(result_int, result_int, brw_imm_ud(0x3f800000u));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ /* Convert from 32-bit float to 64-bit double */
+ result.type = BRW_REGISTER_TYPE_DF;
+ inst = bld.MOV(result, retype(result_int, BRW_REGISTER_TYPE_F));
+
+ if (instr->dest.saturate) {
+ inst = bld.MOV(result, result);
+ inst->saturate = true;
+ }
}
break;
}