aboutsummaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorIago Toral Quiroga <[email protected]>2019-02-01 11:41:33 +0100
committerJuan A. Suarez Romero <[email protected]>2019-04-18 11:05:18 +0200
commit7376d57a9c6ae69bc47bbbfe5d3b1a0ed0639227 (patch)
treea5741455436867d3e82bdab21ee2bad4f458491b /src/intel
parent6ff52f0628a1d3401a3a18eb576158e4de66d044 (diff)
intel/compiler: validate region restrictions for half-float conversions
v2: - Consider implicit conversions in 2-src instructions too (Curro) - For restrictions that involve destination stride requirements only validate them for Align1, since Align16 always requires packed data. - Skip general rule for the dst/execution type size ratio for mixed float instructions on CHV and SKL+, these have their own set of rules that we'll be validated separately. v3 (Curro): - Do not check src1 type in single-source instructions. - Check restriction on src1. - Remove invalid test. Reviewed-by: Francisco Jerez <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/compiler/brw_eu_validate.c155
-rw-r--r--src/intel/compiler/test_eu_validate.cpp116
2 files changed, 270 insertions, 1 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c
index bd0e48a5e5c..54bffb3af03 100644
--- a/src/intel/compiler/brw_eu_validate.c
+++ b/src/intel/compiler/brw_eu_validate.c
@@ -470,6 +470,66 @@ is_packed(unsigned vstride, unsigned width, unsigned hstride)
}
/**
+ * Returns whether an instruction is an explicit or implicit conversion
+ * to/from half-float.
+ */
+static bool
+is_half_float_conversion(const struct gen_device_info *devinfo,
+ const brw_inst *inst)
+{
+ enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+
+ unsigned num_sources = num_sources_from_inst(devinfo, inst);
+ enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+
+ if (dst_type != src0_type &&
+ (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) {
+ return true;
+ } else if (num_sources > 1) {
+ enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+ return dst_type != src1_type &&
+ (dst_type == BRW_REGISTER_TYPE_HF ||
+ src1_type == BRW_REGISTER_TYPE_HF);
+ }
+
+ return false;
+}
+
+/*
+ * Returns whether an instruction is using mixed float operation mode
+ */
+static bool
+is_mixed_float(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+ if (devinfo->gen < 8)
+ return false;
+
+ if (inst_is_send(devinfo, inst))
+ return false;
+
+ unsigned opcode = brw_inst_opcode(devinfo, inst);
+ const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
+ if (desc->ndst == 0)
+ return false;
+
+ /* FIXME: support 3-src instructions */
+ unsigned num_sources = num_sources_from_inst(devinfo, inst);
+ assert(num_sources < 3);
+
+ enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
+ enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+
+ if (num_sources == 1)
+ return types_are_mixed_float(src0_type, dst_type);
+
+ enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
+
+ return types_are_mixed_float(src0_type, src1_type) ||
+ types_are_mixed_float(src0_type, dst_type) ||
+ types_are_mixed_float(src1_type, dst_type);
+}
+
+/**
* Checks restrictions listed in "General Restrictions Based on Operand Types"
* in the "Register Region Restrictions" section.
*/
@@ -539,7 +599,100 @@ general_restrictions_based_on_operand_types(const struct gen_device_info *devinf
exec_type_size == 8 && dst_type_size == 4)
dst_type_size = 8;
- if (exec_type_size > dst_type_size) {
+ if (is_half_float_conversion(devinfo, inst)) {
+ /**
+ * A helper to validate used in the validation of the following restriction
+ * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
+ *
+ * "There is no direct conversion from HF to DF or DF to HF.
+ * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
+ *
+ * Even if these restrictions are listed for the MOV instruction, we
+ * validate this more generally, since there is the possibility
+ * of implicit conversions from other instructions, such us implicit
+ * conversion from integer to HF with the ADD instruction in SKL+.
+ */
+ enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
+ enum brw_reg_type src1_type = num_sources > 1 ?
+ brw_inst_src1_type(devinfo, inst) : 0;
+ ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF &&
+ (type_sz(src0_type) == 8 ||
+ (num_sources > 1 && type_sz(src1_type) == 8)),
+ "There are no direct conversions between 64-bit types and HF");
+
+ ERROR_IF(type_sz(dst_type) == 8 &&
+ (src0_type == BRW_REGISTER_TYPE_HF ||
+ (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)),
+ "There are no direct conversions between 64-bit types and HF");
+
+ /* From the BDW+ PRM:
+ *
+ * "Conversion between Integer and HF (Half Float) must be
+ * DWord-aligned and strided by a DWord on the destination."
+ *
+ * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
+ *
+ * "There is a relaxed alignment rule for word destinations. When
+ * the destination type is word (UW, W, HF), destination data types
+ * can be aligned to either the lowest word or the second lowest
+ * word of the execution channel. This means the destination data
+ * words can be either all in the even word locations or all in the
+ * odd word locations."
+ *
+ * We do not implement the second rule as is though, since empirical
+ * testing shows inconsistencies:
+ * - It suggests that packed 16-bit is not allowed, which is not true.
+ * - It suggests that conversions from Q/DF to W (which need to be
+ * 64-bit aligned on the destination) are not possible, which is
+ * not true.
+ *
+ * So from this rule we only validate the implication that conversions
+ * from F to HF need to be DWord strided (except in Align1 mixed
+ * float mode where packed fp16 destination is allowed so long as the
+ * destination is oword-aligned).
+ *
+ * Finally, we only validate this for Align1 because Align16 always
+ * requires packed destinations, so these restrictions can't possibly
+ * apply to Align16 mode.
+ */
+ if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
+ if ((dst_type == BRW_REGISTER_TYPE_HF &&
+ (brw_reg_type_is_integer(src0_type) ||
+ (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) ||
+ (brw_reg_type_is_integer(dst_type) &&
+ (src0_type == BRW_REGISTER_TYPE_HF ||
+ (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) {
+ ERROR_IF(dst_stride * dst_type_size != 4,
+ "Conversions between integer and half-float must be "
+ "strided by a DWord on the destination");
+
+ unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
+ ERROR_IF(subreg % 4 != 0,
+ "Conversions between integer and half-float must be "
+ "aligned to a DWord on the destination");
+ } else if ((devinfo->is_cherryview || devinfo->gen >= 9) &&
+ dst_type == BRW_REGISTER_TYPE_HF) {
+ unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
+ ERROR_IF(dst_stride != 2 &&
+ !(is_mixed_float(devinfo, inst) &&
+ dst_stride == 1 && subreg % 16 == 0),
+ "Conversions to HF must have either all words in even "
+ "word locations or all words in odd word locations or "
+ "be mixed-float with Oword-aligned packed destination");
+ }
+ }
+ }
+
+ /* There are special regioning rules for mixed-float mode in CHV and SKL that
+ * override the general rule for the ratio of sizes of the destination type
+ * and the execution type. We will add validation for those in a later patch.
+ */
+ bool validate_dst_size_and_exec_size_ratio =
+ !is_mixed_float(devinfo, inst) ||
+ !(devinfo->is_cherryview || devinfo->gen >= 9);
+
+ if (validate_dst_size_and_exec_size_ratio &&
+ exec_type_size > dst_type_size) {
if (!(dst_type_is_byte && inst_is_raw_move(devinfo, inst))) {
ERROR_IF(dst_stride * dst_type_size != exec_type_size,
"Destination stride must be equal to the ratio of the sizes "
diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp
index 73300b23122..3fdbecb003b 100644
--- a/src/intel/compiler/test_eu_validate.cpp
+++ b/src/intel/compiler/test_eu_validate.cpp
@@ -848,6 +848,122 @@ TEST_P(validation_test, byte_destination_relaxed_alignment)
}
}
+TEST_P(validation_test, half_float_conversion)
+{
+ static const struct {
+ enum brw_reg_type dst_type;
+ enum brw_reg_type src_type;
+ unsigned dst_stride;
+ unsigned dst_subnr;
+ bool expected_result_bdw;
+ bool expected_result_chv_gen9;
+ } inst[] = {
+#define INST_C(dst_type, src_type, dst_stride, dst_subnr, expected_result) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_subnr, \
+ expected_result, \
+ expected_result, \
+ }
+#define INST_S(dst_type, src_type, dst_stride, dst_subnr, \
+ expected_result_bdw, expected_result_chv_gen9) \
+ { \
+ BRW_REGISTER_TYPE_##dst_type, \
+ BRW_REGISTER_TYPE_##src_type, \
+ BRW_HORIZONTAL_STRIDE_##dst_stride, \
+ dst_subnr, \
+ expected_result_bdw, \
+ expected_result_chv_gen9, \
+ }
+
+ /* MOV to half-float destination */
+ INST_C(HF, B, 1, 0, false),
+ INST_C(HF, W, 1, 0, false),
+ INST_C(HF, HF, 1, 0, true),
+ INST_C(HF, HF, 1, 2, true),
+ INST_C(HF, D, 1, 0, false),
+ INST_S(HF, F, 1, 0, false, true),
+ INST_C(HF, Q, 1, 0, false),
+ INST_C(HF, B, 2, 0, true),
+ INST_C(HF, B, 2, 2, false),
+ INST_C(HF, W, 2, 0, true),
+ INST_C(HF, W, 2, 2, false),
+ INST_C(HF, HF, 2, 0, true),
+ INST_C(HF, HF, 2, 2, true),
+ INST_C(HF, D, 2, 0, true),
+ INST_C(HF, D, 2, 2, false),
+ INST_C(HF, F, 2, 0, true),
+ INST_S(HF, F, 2, 2, false, true),
+ INST_C(HF, Q, 2, 0, false),
+ INST_C(HF, DF, 2, 0, false),
+ INST_C(HF, B, 4, 0, false),
+ INST_C(HF, W, 4, 0, false),
+ INST_C(HF, HF, 4, 0, true),
+ INST_C(HF, HF, 4, 2, true),
+ INST_C(HF, D, 4, 0, false),
+ INST_C(HF, F, 4, 0, false),
+ INST_C(HF, Q, 4, 0, false),
+ INST_C(HF, DF, 4, 0, false),
+
+ /* MOV from half-float source */
+ INST_C( B, HF, 1, 0, false),
+ INST_C( W, HF, 1, 0, false),
+ INST_C( D, HF, 1, 0, true),
+ INST_C( D, HF, 1, 4, true),
+ INST_C( F, HF, 1, 0, true),
+ INST_C( F, HF, 1, 4, true),
+ INST_C( Q, HF, 1, 0, false),
+ INST_C(DF, HF, 1, 0, false),
+ INST_C( B, HF, 2, 0, false),
+ INST_C( W, HF, 2, 0, true),
+ INST_C( W, HF, 2, 2, false),
+ INST_C( D, HF, 2, 0, false),
+ INST_C( F, HF, 2, 0, true),
+ INST_C( B, HF, 4, 0, true),
+ INST_C( B, HF, 4, 1, false),
+ INST_C( W, HF, 4, 0, false),
+
+#undef INST_C
+#undef INST_S
+ };
+
+ if (devinfo.gen < 8)
+ return;
+
+ for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) {
+ if (!devinfo.has_64bit_types &&
+ (type_sz(inst[i].src_type) == 8 || type_sz(inst[i].dst_type) == 8)) {
+ continue;
+ }
+
+ brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
+
+ brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
+
+ brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
+ brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
+
+ if (inst[i].src_type == BRW_REGISTER_TYPE_B) {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
+ } else {
+ brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
+ brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
+ brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
+ }
+
+ if (devinfo.is_cherryview || devinfo.gen >= 9)
+ EXPECT_EQ(inst[i].expected_result_chv_gen9, validate(p));
+ else
+ EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
+
+ clear_instructions(p);
+ }
+}
+
TEST_P(validation_test, vector_immediate_destination_alignment)
{
static const struct {