diff options
author | Iago Toral Quiroga <[email protected]> | 2019-02-06 09:13:22 +0100 |
---|---|---|
committer | Juan A. Suarez Romero <[email protected]> | 2019-04-18 13:22:46 +0200 |
commit | 8ed6d74c922e967732bb6f8b4e39bdcedd46e544 (patch) | |
tree | 5668e68907b108619d8176f40506c77e2499345f /src | |
parent | 58d6417e591db3f440c4a1c06c9cfdfae2a06dfb (diff) |
intel/compiler: validate region restrictions for mixed float mode
v2:
- Adapted unit tests to make them consistent with the changes done
to the validation of half-float conversions.
v3 (Curro):
- Check all the accummulators
- Constify declarations
- Do not check src1 type in single-source instructions.
- Check for all instructions that read accumulator (either implicitly or
explicitly)
- Check restrictions in src1 too.
- Merge conditional block
- Add invalid test case.
v4 (Curro):
- Assert on 3-src instructions, as they are not validated.
- Get rid of types_are_mixed_float(), as we know instruction is mixed
float at that point.
- Remove conditions from not verified case.
- Fix brackets on conditional.
Reviewed-by: Francisco Jerez <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/intel/compiler/brw_eu_validate.c | 250 | ||||
-rw-r--r-- | src/intel/compiler/test_eu_validate.cpp | 630 |
2 files changed, 880 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c index cfaf126e2f5..943f724e60f 100644 --- a/src/intel/compiler/brw_eu_validate.c +++ b/src/intel/compiler/brw_eu_validate.c @@ -171,6 +171,20 @@ src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) } static bool +src0_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; +} + +static bool +src1_is_acc(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; +} + +static bool src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) { return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; @@ -275,6 +289,24 @@ sources_not_null(const struct gen_device_info *devinfo, return error_msg; } +static bool +inst_uses_src_acc(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + /* Check instructions that use implicit accumulator sources */ + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_MAC: + case BRW_OPCODE_MACH: + case BRW_OPCODE_SADA2: + return true; + } + + /* FIXME: support 3-src instructions */ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + assert(num_sources < 3); + + return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); +} + static struct string send_restrictions(const struct gen_device_info *devinfo, const brw_inst *inst) @@ -938,6 +970,223 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, return error_msg; } +static struct string +special_restrictions_for_mixed_float_mode(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + const unsigned opcode = brw_inst_opcode(devinfo, inst); + const unsigned num_sources = num_sources_from_inst(devinfo, inst); + if (num_sources >= 3) + return error_msg; + + if (!is_mixed_float(devinfo, inst)) + return error_msg; + + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); + bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; + + enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); + enum brw_reg_type src1_type = num_sources > 1 ? + brw_inst_src1_type(devinfo, inst) : 0; + enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); + + unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); + bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "Indirect addressing on source is not supported when source and + * destination data types are mixed float." + */ + ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || + (num_sources > 1 && + brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), + "Indirect addressing on source is not supported when source and " + "destination data types are mixed float"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is f32. Instruction + * execution size must be no more than 8." + */ + ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, + "Mixed float mode with 32-bit float destination is limited " + "to SIMD8"); + + if (is_align16) { + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "In Align16 mode, when half float and float data types are mixed + * between source operands OR between source and destination operands, + * the register content are assumed to be packed." + * + * Since Align16 doesn't have a concept of horizontal stride (or width), + * it means that vertical stride must always be 4, since 0 and 2 would + * lead to replicated data, and any other value is disallowed in Align16. + */ + ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "Align16 mixed float mode assumes packed data (vstride must be 4"); + + ERROR_IF(num_sources >= 2 && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "Align16 mixed float mode assumes packed data (vstride must be 4"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "For Align16 mixed mode, both input and output packed f16 data + * must be oword aligned, no oword crossing in packed f16." + * + * The previous rule requires that Align16 operands are always packed, + * and since there is only one bit for Align16 subnr, which represents + * offsets 0B and 16B, this rule is always enforced and we don't need to + * validate it. + */ + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is packed f16 for both + * Align1 and Align16." + * + * And: + * + * "In Align16 mode, when half float and float data types are mixed + * between source operands OR between source and destination operands, + * the register content are assumed to be packed." + * + * Which implies that SIMD16 is not available in Align16. This is further + * confirmed by: + * + * "For Align16 mixed mode, both input and output packed f16 data + * must be oword aligned, no oword crossing in packed f16" + * + * Since oword-aligned packed f16 data would cross oword boundaries when + * the execution size is larger than 8. + */ + ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No accumulator read access for Align16 mixed float." + */ + ERROR_IF(inst_uses_src_acc(devinfo, inst), + "No accumulator read access for Align16 mixed float"); + } else { + assert(!is_align16); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No SIMD16 in mixed mode when destination is packed f16 for both + * Align1 and Align16." + */ + ERROR_IF(exec_size > 8 && dst_is_packed && + dst_type == BRW_REGISTER_TYPE_HF, + "Align1 mixed float mode is limited to SIMD8 when destination " + "is packed half-float"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "Math operations for mixed mode: + * - In Align1, f16 inputs need to be strided" + */ + if (opcode == BRW_OPCODE_MATH) { + if (src0_type == BRW_REGISTER_TYPE_HF) { + ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, + "Align1 mixed mode math needs strided half-float inputs"); + } + + if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { + ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, + "Align1 mixed mode math needs strided half-float inputs"); + } + } + + if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "In Align1, destination stride can be smaller than execution + * type. When destination is stride of 1, 16 bit packed data is + * updated on the destination. However, output packed f16 data + * must be oword aligned, no oword crossing in packed f16." + * + * The requirement of not crossing oword boundaries for 16-bit oword + * aligned data means that execution size is limited to 8. + */ + unsigned subreg; + if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) + subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + else + subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); + ERROR_IF(subreg % 16 != 0, + "Align1 mixed mode packed half-float output must be " + "oword aligned"); + ERROR_IF(exec_size > 8, + "Align1 mixed mode packed half-float output must not " + "cross oword boundaries (max exec size is 8)"); + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "When source is float or half float from accumulator register and + * destination is half float with a stride of 1, the source must + * register aligned. i.e., source must have offset zero." + * + * Align16 mixed float mode doesn't allow accumulator access on sources, + * so we only need to check this for Align1. + */ + if (src0_is_acc(devinfo, inst) && + (src0_type == BRW_REGISTER_TYPE_F || + src0_type == BRW_REGISTER_TYPE_HF)) { + ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, + "Mixed float mode requires register-aligned accumulator " + "source reads when destination is packed half-float"); + + } + + if (num_sources > 1 && + src1_is_acc(devinfo, inst) && + (src1_type == BRW_REGISTER_TYPE_F || + src1_type == BRW_REGISTER_TYPE_HF)) { + ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, + "Mixed float mode requires register-aligned accumulator " + "source reads when destination is packed half-float"); + } + } + + /* From the SKL PRM, Special Restrictions for Handling Mixed Mode + * Float Operations: + * + * "No swizzle is allowed when an accumulator is used as an implicit + * source or an explicit source in an instruction. i.e. when + * destination is half float with an implicit accumulator source, + * destination stride needs to be 2." + * + * FIXME: it is not quite clear what the first sentence actually means + * or its link to the implication described after it, so we only + * validate the explicit implication, which is clearly described. + */ + if (dst_type == BRW_REGISTER_TYPE_HF && + inst_uses_src_acc(devinfo, inst)) { + ERROR_IF(dst_stride != 2, + "Mixed float mode with implicit/explicit accumulator " + "source and half-float destination requires a stride " + "of 2 on the destination"); + } + } + + return error_msg; +} + /** * Creates an \p access_mask for an \p exec_size, \p element_size, and a region * @@ -1576,6 +1825,7 @@ brw_validate_instructions(const struct gen_device_info *devinfo, CHECK(send_restrictions); CHECK(general_restrictions_based_on_operand_types); CHECK(general_restrictions_on_region_parameters); + CHECK(special_restrictions_for_mixed_float_mode); CHECK(region_alignment_rules); CHECK(vector_immediate_restrictions); CHECK(special_requirements_for_handling_double_precision_data_types); diff --git a/src/intel/compiler/test_eu_validate.cpp b/src/intel/compiler/test_eu_validate.cpp index 2e06da2f5b4..65326416064 100644 --- a/src/intel/compiler/test_eu_validate.cpp +++ b/src/intel/compiler/test_eu_validate.cpp @@ -1019,6 +1019,636 @@ TEST_P(validation_test, half_float_conversion) } } +TEST_P(validation_test, mixed_float_source_indirect_addressing) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + bool dst_indirect; + bool src0_indirect; + bool expected_result; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, \ + dst_stride, dst_indirect, src0_indirect, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + dst_indirect, \ + src0_indirect, \ + expected_result, \ + } + + /* Source and dest are mixed float: indirect src addressing not allowed */ + INST(HF, F, F, 2, false, false, true), + INST(HF, F, F, 2, true, false, true), + INST(HF, F, F, 2, false, true, false), + INST(HF, F, F, 2, true, true, false), + INST( F, HF, F, 1, false, false, true), + INST( F, HF, F, 1, true, false, true), + INST( F, HF, F, 1, false, true, false), + INST( F, HF, F, 1, true, true, false), + + INST(HF, HF, F, 2, false, false, true), + INST(HF, HF, F, 2, true, false, true), + INST(HF, HF, F, 2, false, true, false), + INST(HF, HF, F, 2, true, true, false), + INST( F, F, HF, 1, false, false, true), + INST( F, F, HF, 1, true, false, true), + INST( F, F, HF, 1, false, true, false), + INST( F, F, HF, 1, true, true, false), + +#undef INST + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect); + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align1_simd16) +{ + static const struct { + unsigned exec_size; + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + bool expected_result; + } inst[] = { +#define INST(exec_size, dst_type, src0_type, src1_type, \ + dst_stride, expected_result) \ + { \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + expected_result, \ + } + + /* No SIMD16 in mixed mode when destination is packed f16 */ + INST( 8, HF, F, HF, 2, true), + INST(16, HF, HF, F, 2, true), + INST(16, HF, HF, F, 1, false), + INST(16, HF, F, HF, 1, false), + + /* No SIMD16 in mixed mode when destination is f32 */ + INST( 8, F, HF, F, 1, true), + INST( 8, F, F, HF, 1, true), + INST(16, F, HF, F, 1, false), + INST(16, F, F, HF, 1, false), + +#undef INST + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + bool read_acc; + unsigned subnr; + bool expected_result_bdw; + bool expected_result_chv_skl; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \ + expected_result_bdw, expected_result_chv_skl) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + read_acc, \ + subnr, \ + expected_result_bdw, \ + expected_result_chv_skl, \ + } + + /* Destination is not packed */ + INST(HF, HF, F, 2, true, 0, true, true), + INST(HF, HF, F, 2, true, 2, true, true), + INST(HF, HF, F, 2, true, 4, true, true), + INST(HF, HF, F, 2, true, 8, true, true), + INST(HF, HF, F, 2, true, 16, true, true), + + /* Destination is packed, we don't read acc */ + INST(HF, HF, F, 1, false, 0, false, true), + INST(HF, HF, F, 1, false, 2, false, true), + INST(HF, HF, F, 1, false, 4, false, true), + INST(HF, HF, F, 1, false, 8, false, true), + INST(HF, HF, F, 1, false, 16, false, true), + + /* Destination is packed, we read acc */ + INST(HF, HF, F, 1, true, 0, false, false), + INST(HF, HF, F, 1, true, 2, false, false), + INST(HF, HF, F, 1, true, 4, false, false), + INST(HF, HF, F, 1, true, 8, false, false), + INST(HF, HF, F, 1, true, 16, false, false), + +#undef INST + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr); + + if (devinfo.is_cherryview || devinfo.gen >= 9) + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); + else + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_fp16_dest_with_acc) +{ + static const struct { + unsigned exec_size; + unsigned opcode; + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + bool read_acc; + bool expected_result_bdw; + bool expected_result_chv_skl; + } inst[] = { +#define INST(exec_size, opcode, dst_type, src0_type, src1_type, \ + dst_stride, read_acc,expected_result_bdw, \ + expected_result_chv_skl) \ + { \ + BRW_EXECUTE_##exec_size, \ + BRW_OPCODE_##opcode, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + read_acc, \ + expected_result_bdw, \ + expected_result_chv_skl, \ + } + + /* Packed fp16 dest with implicit acc needs hstride=2 */ + INST(8, MAC, HF, HF, F, 1, false, false, false), + INST(8, MAC, HF, HF, F, 2, false, true, true), + INST(8, MAC, HF, F, HF, 1, false, false, false), + INST(8, MAC, HF, F, HF, 2, false, true, true), + + /* Packed fp16 dest with explicit acc needs hstride=2 */ + INST(8, ADD, HF, HF, F, 1, true, false, false), + INST(8, ADD, HF, HF, F, 2, true, true, true), + INST(8, ADD, HF, F, HF, 1, true, false, false), + INST(8, ADD, HF, F, HF, 2, true, true, true), + + /* If destination is not fp16, restriction doesn't apply */ + INST(8, MAC, F, HF, F, 1, false, true, true), + INST(8, MAC, F, HF, F, 2, false, true, true), + + /* If there is no implicit/explicit acc, restriction doesn't apply */ + INST(8, ADD, HF, HF, F, 1, false, false, true), + INST(8, ADD, HF, HF, F, 2, false, true, true), + INST(8, ADD, HF, F, HF, 1, false, false, true), + INST(8, ADD, HF, F, HF, 2, false, true, true), + INST(8, ADD, F, HF, F, 1, false, true, true), + INST(8, ADD, F, HF, F, 2, false, true, true), + +#undef INST + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + if (inst[i].opcode == BRW_OPCODE_MAC) { + brw_MAC(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + } else { + assert(inst[i].opcode == BRW_OPCODE_ADD); + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + } + + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + if (devinfo.is_cherryview || devinfo.gen >= 9) + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); + else + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + unsigned src0_stride; + unsigned src1_stride; + bool expected_result; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, \ + dst_stride, src0_stride, src1_stride, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + BRW_HORIZONTAL_STRIDE_##src0_stride, \ + BRW_HORIZONTAL_STRIDE_##src1_stride, \ + expected_result, \ + } + + INST(HF, HF, F, 2, 2, 1, true), + INST(HF, F, HF, 2, 1, 2, true), + INST(HF, F, HF, 1, 1, 2, true), + INST(HF, F, HF, 2, 1, 1, false), + INST(HF, HF, F, 2, 1, 1, false), + INST(HF, HF, F, 1, 1, 1, false), + INST(HF, HF, F, 2, 1, 1, false), + INST( F, HF, F, 1, 1, 1, false), + INST( F, F, HF, 1, 1, 2, true), + INST( F, HF, HF, 1, 2, 1, false), + INST( F, HF, HF, 1, 2, 2, true), + +#undef INST + }; + + /* No half-float math in gen8 */ + if (devinfo.gen < 9) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + gen6_math(p, retype(g0, inst[i].dst_type), + BRW_MATH_FUNCTION_POW, + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); + brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride); + + brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); + brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align1_packed_fp16_dst) +{ + static const struct { + unsigned exec_size; + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned dst_stride; + unsigned dst_subnr; + bool expected_result_bdw; + bool expected_result_chv_skl; + } inst[] = { +#define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \ + expected_result_bdw, expected_result_chv_skl) \ + { \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_HORIZONTAL_STRIDE_##dst_stride, \ + dst_subnr, \ + expected_result_bdw, \ + expected_result_chv_skl \ + } + + /* SIMD8 packed fp16 dst won't cross oword boundaries if region is + * oword-aligned + */ + INST( 8, HF, HF, F, 1, 0, false, true), + INST( 8, HF, HF, F, 1, 2, false, false), + INST( 8, HF, HF, F, 1, 4, false, false), + INST( 8, HF, HF, F, 1, 8, false, false), + INST( 8, HF, HF, F, 1, 16, false, true), + + /* SIMD16 packed fp16 always crosses oword boundaries */ + INST(16, HF, HF, F, 1, 0, false, false), + INST(16, HF, HF, F, 1, 2, false, false), + INST(16, HF, HF, F, 1, 4, false, false), + INST(16, HF, HF, F, 1, 8, false, false), + INST(16, HF, HF, F, 1, 16, false, false), + + /* If destination is not packed (or not fp16) we can cross oword + * boundaries + */ + INST( 8, HF, HF, F, 2, 0, true, true), + INST( 8, F, HF, F, 1, 0, true, true), + +#undef INST + }; + + if (devinfo.gen < 8) + return; + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride); + brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr); + + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4); + brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + + brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4); + brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1); + + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + if (devinfo.is_cherryview || devinfo.gen >= 9) + EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p)); + else + EXPECT_EQ(inst[i].expected_result_bdw, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align16_packed_data) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned src0_vstride; + unsigned src1_vstride; + bool expected_result; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, \ + src0_vstride, src1_vstride, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_VERTICAL_STRIDE_##src0_vstride, \ + BRW_VERTICAL_STRIDE_##src1_vstride, \ + expected_result, \ + } + + /* We only test with F destination because there is a restriction + * by which F->HF conversions need to be DWord aligned but Align16 also + * requires that destination horizontal stride is 1. + */ + INST(F, F, HF, 4, 4, true), + INST(F, F, HF, 2, 4, false), + INST(F, F, HF, 4, 2, false), + INST(F, F, HF, 0, 4, false), + INST(F, F, HF, 4, 0, false), + INST(F, HF, F, 4, 4, true), + INST(F, HF, F, 4, 2, false), + INST(F, HF, F, 2, 4, false), + INST(F, HF, F, 0, 4, false), + INST(F, HF, F, 4, 0, false), + +#undef INST + }; + + if (devinfo.gen < 8 || devinfo.gen >= 11) + return; + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align16_no_simd16) +{ + static const struct { + unsigned exec_size; + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + bool expected_result; + } inst[] = { +#define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \ + { \ + BRW_EXECUTE_##exec_size, \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + expected_result, \ + } + + /* We only test with F destination because there is a restriction + * by which F->HF conversions need to be DWord aligned but Align16 also + * requires that destination horizontal stride is 1. + */ + INST( 8, F, F, HF, true), + INST( 8, F, HF, F, true), + INST( 8, F, F, HF, true), + INST(16, F, F, HF, false), + INST(16, F, HF, F, false), + INST(16, F, F, HF, false), + +#undef INST + }; + + if (devinfo.gen < 8 || devinfo.gen >= 11) + return; + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size); + + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align16_no_acc_read) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + bool read_acc; + bool expected_result; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + read_acc, \ + expected_result, \ + } + + /* We only test with F destination because there is a restriction + * by which F->HF conversions need to be DWord aligned but Align16 also + * requires that destination horizontal stride is 1. + */ + INST( F, F, HF, false, true), + INST( F, F, HF, true, false), + INST( F, HF, F, false, true), + INST( F, HF, F, true, false), + +#undef INST + }; + + if (devinfo.gen < 8 || devinfo.gen >= 11) + return; + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + brw_ADD(p, retype(g0, inst[i].dst_type), + retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + +TEST_P(validation_test, mixed_float_align16_math_packed_format) +{ + static const struct { + enum brw_reg_type dst_type; + enum brw_reg_type src0_type; + enum brw_reg_type src1_type; + unsigned src0_vstride; + unsigned src1_vstride; + bool expected_result; + } inst[] = { +#define INST(dst_type, src0_type, src1_type, \ + src0_vstride, src1_vstride, expected_result) \ + { \ + BRW_REGISTER_TYPE_##dst_type, \ + BRW_REGISTER_TYPE_##src0_type, \ + BRW_REGISTER_TYPE_##src1_type, \ + BRW_VERTICAL_STRIDE_##src0_vstride, \ + BRW_VERTICAL_STRIDE_##src1_vstride, \ + expected_result, \ + } + + /* We only test with F destination because there is a restriction + * by which F->HF conversions need to be DWord aligned but Align16 also + * requires that destination horizontal stride is 1. + */ + INST( F, HF, F, 4, 0, false), + INST( F, HF, HF, 4, 4, true), + INST( F, F, HF, 4, 0, false), + INST( F, F, HF, 2, 4, false), + INST( F, F, HF, 4, 2, false), + INST( F, HF, HF, 0, 4, false), + +#undef INST + }; + + /* Align16 Math for mixed float mode is not supported in gen8 */ + if (devinfo.gen < 9 || devinfo.gen >= 11) + return; + + brw_set_default_access_mode(p, BRW_ALIGN_16); + + for (unsigned i = 0; i < sizeof(inst) / sizeof(inst[0]); i++) { + gen6_math(p, retype(g0, inst[i].dst_type), + BRW_MATH_FUNCTION_POW, + retype(g0, inst[i].src0_type), + retype(g0, inst[i].src1_type)); + + brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride); + brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride); + + EXPECT_EQ(inst[i].expected_result, validate(p)); + + clear_instructions(p); + } +} + TEST_P(validation_test, vector_immediate_destination_alignment) { static const struct { |