diff options
author | Jason Ekstrand <[email protected]> | 2017-02-28 09:10:43 -0800 |
---|---|---|
committer | Emil Velikov <[email protected]> | 2017-03-13 11:16:34 +0000 |
commit | 700bebb958e93f4d472c383de62ced9db8e64bec (patch) | |
tree | 0075c098c56c338f38ba0db80b9dba3e7e268a17 /src/intel/compiler/brw_eu_validate.c | |
parent | d0d4a5f43b4dd79bd7bfff7c7deaade10bfebf7c (diff) |
i965: Move the back-end compiler to src/intel/compiler
Mostly a dummy git mv with a couple of noticable parts:
- With the earlier header cleanups, nothing in src/intel depends
files from src/mesa/drivers/dri/i965/
- Both Autoconf and Android builds are addressed. Thanks to Mauro and
Tapani for the fixups in the latter
- brw_util.[ch] is not really compiler specific, so it's moved to i965.
v2:
- move brw_eu_defines.h instead of brw_defines.h
- remove no-longer applicable includes
- add missing vulkan/ prefix in the Android build (thanks Tapani)
v3:
- don't list brw_defines.h in src/intel/Makefile.sources (Jason)
- rebase on top of the oa patches
[Emil Velikov: commit message, various small fixes througout]
Signed-off-by: Emil Velikov <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_eu_validate.c')
-rw-r--r-- | src/intel/compiler/brw_eu_validate.c | 1051 |
1 files changed, 1051 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_eu_validate.c b/src/intel/compiler/brw_eu_validate.c new file mode 100644 index 00000000000..64615af44ac --- /dev/null +++ b/src/intel/compiler/brw_eu_validate.c @@ -0,0 +1,1051 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** @file brw_eu_validate.c + * + * This file implements a pass that validates shader assembly. + */ + +#include "brw_eu.h" + +/* We're going to do lots of string concatenation, so this should help. */ +struct string { + char *str; + size_t len; +}; + +static void +cat(struct string *dest, const struct string src) +{ + dest->str = realloc(dest->str, dest->len + src.len + 1); + memcpy(dest->str + dest->len, src.str, src.len); + dest->str[dest->len + src.len] = '\0'; + dest->len = dest->len + src.len; +} +#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) + +#define error(str) "\tERROR: " str "\n" +#define ERROR_INDENT "\t " + +#define ERROR(msg) ERROR_IF(true, msg) +#define ERROR_IF(cond, msg) \ + do { \ + if (cond) { \ + CAT(error_msg, error(msg)); \ + } \ + } while(0) + +#define CHECK(func, args...) \ + do { \ + struct string __msg = func(devinfo, inst, ##args); \ + if (__msg.str) { \ + cat(&error_msg, __msg); \ + free(__msg.str); \ + } \ + } while (0) + +static bool +inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + switch (brw_inst_opcode(devinfo, inst)) { + case BRW_OPCODE_SEND: + case BRW_OPCODE_SENDC: + case BRW_OPCODE_SENDS: + case BRW_OPCODE_SENDSC: + return true; + default: + return false; + } +} + +static unsigned +signed_type(unsigned type) +{ + switch (type) { + case BRW_HW_REG_TYPE_UD: return BRW_HW_REG_TYPE_D; + case BRW_HW_REG_TYPE_UW: return BRW_HW_REG_TYPE_W; + case BRW_HW_REG_NON_IMM_TYPE_UB: return BRW_HW_REG_NON_IMM_TYPE_B; + case GEN8_HW_REG_TYPE_UQ: return GEN8_HW_REG_TYPE_Q; + default: return type; + } +} + +static bool +inst_is_raw_move(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + unsigned dst_type = signed_type(brw_inst_dst_reg_type(devinfo, inst)); + unsigned src_type = signed_type(brw_inst_src0_reg_type(devinfo, inst)); + + if (brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && + (brw_inst_src0_negate(devinfo, inst) || + brw_inst_src0_abs(devinfo, inst))) + return false; + + return brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MOV && + brw_inst_saturate(devinfo, inst) == 0 && + dst_type == src_type; +} + +static bool +dst_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; +} + +static bool +src0_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; +} + +static bool +src1_is_null(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && + brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; +} + +static bool +src0_is_grf(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE; +} + +static bool +src0_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && + brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && + brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; +} + +static bool +src1_has_scalar_region(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && + brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && + brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; +} + +static unsigned +num_sources_from_inst(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + const struct opcode_desc *desc = + brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); + unsigned math_function; + + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { + math_function = brw_inst_math_function(devinfo, inst); + } else if (devinfo->gen < 6 && + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { + if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { + /* src1 must be a descriptor (including the information to determine + * that the SEND is doing an extended math operation), but src0 can + * actually be null since it serves as the source of the implicit GRF + * to MRF move. + * + * If we stop using that functionality, we'll have to revisit this. + */ + return 2; + } else { + /* Send instructions are allowed to have null sources since they use + * the base_mrf field to specify which message register source. + */ + return 0; + } + } else { + assert(desc->nsrc < 4); + return desc->nsrc; + } + + switch (math_function) { + case BRW_MATH_FUNCTION_INV: + case BRW_MATH_FUNCTION_LOG: + case BRW_MATH_FUNCTION_EXP: + case BRW_MATH_FUNCTION_SQRT: + case BRW_MATH_FUNCTION_RSQ: + case BRW_MATH_FUNCTION_SIN: + case BRW_MATH_FUNCTION_COS: + case BRW_MATH_FUNCTION_SINCOS: + case GEN8_MATH_FUNCTION_INVM: + case GEN8_MATH_FUNCTION_RSQRTM: + return 1; + case BRW_MATH_FUNCTION_FDIV: + case BRW_MATH_FUNCTION_POW: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: + case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: + case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: + return 2; + default: + unreachable("not reached"); + } +} + +static struct string +sources_not_null(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + struct string error_msg = { .str = NULL, .len = 0 }; + + /* Nothing to test. 3-src instructions can only have GRF sources, and + * there's no bit to control the file. + */ + if (num_sources == 3) + return (struct string){}; + + if (num_sources >= 1) + ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); + + if (num_sources == 2) + ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); + + return error_msg; +} + +static struct string +send_restrictions(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + struct string error_msg = { .str = NULL, .len = 0 }; + + if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND) { + ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, + "send must use direct addressing"); + + if (devinfo->gen >= 7) { + ERROR_IF(!src0_is_grf(devinfo, inst), "send from non-GRF"); + ERROR_IF(brw_inst_eot(devinfo, inst) && + brw_inst_src0_da_reg_nr(devinfo, inst) < 112, + "send with EOT must use g112-g127"); + } + } + + return error_msg; +} + +static bool +is_unsupported_inst(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + return brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)) == NULL; +} + +static unsigned +execution_type_for_type(unsigned type, bool is_immediate) +{ + /* The meaning of the type bits is dependent on whether the operand is an + * immediate, so normalize them first. + */ + if (is_immediate) { + switch (type) { + case BRW_HW_REG_IMM_TYPE_UV: + case BRW_HW_REG_IMM_TYPE_V: + type = BRW_HW_REG_TYPE_W; + break; + case BRW_HW_REG_IMM_TYPE_VF: + type = BRW_HW_REG_TYPE_F; + break; + case GEN8_HW_REG_IMM_TYPE_DF: + type = GEN7_HW_REG_NON_IMM_TYPE_DF; + break; + case GEN8_HW_REG_IMM_TYPE_HF: + type = GEN8_HW_REG_NON_IMM_TYPE_HF; + break; + default: + break; + } + } + + switch (type) { + case BRW_HW_REG_TYPE_UD: + case BRW_HW_REG_TYPE_D: + return BRW_HW_REG_TYPE_D; + case BRW_HW_REG_TYPE_UW: + case BRW_HW_REG_TYPE_W: + case BRW_HW_REG_NON_IMM_TYPE_UB: + case BRW_HW_REG_NON_IMM_TYPE_B: + return BRW_HW_REG_TYPE_W; + case GEN8_HW_REG_TYPE_UQ: + case GEN8_HW_REG_TYPE_Q: + return GEN8_HW_REG_TYPE_Q; + case BRW_HW_REG_TYPE_F: + case GEN7_HW_REG_NON_IMM_TYPE_DF: + case GEN8_HW_REG_NON_IMM_TYPE_HF: + return type; + default: + unreachable("not reached"); + } +} + +/** + * Returns the execution type of an instruction \p inst + */ +static unsigned +execution_type(const struct gen_device_info *devinfo, const brw_inst *inst) +{ + unsigned num_sources = num_sources_from_inst(devinfo, inst); + unsigned src0_exec_type, src1_exec_type; + unsigned src0_type = brw_inst_src0_reg_type(devinfo, inst); + unsigned src1_type = brw_inst_src1_reg_type(devinfo, inst); + + bool src0_is_immediate = + brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE; + bool src1_is_immediate = + brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE; + + /* Execution data type is independent of destination data type, except in + * mixed F/HF instructions on CHV and SKL+. + */ + unsigned dst_exec_type = brw_inst_dst_reg_type(devinfo, inst); + + src0_exec_type = execution_type_for_type(src0_type, src0_is_immediate); + if (num_sources == 1) { + if ((devinfo->gen >= 9 || devinfo->is_cherryview) && + src0_exec_type == GEN8_HW_REG_NON_IMM_TYPE_HF) { + return dst_exec_type; + } + return src0_exec_type; + } + + src1_exec_type = execution_type_for_type(src1_type, src1_is_immediate); + if (src0_exec_type == src1_exec_type) + return src0_exec_type; + + /* Mixed operand types where one is float is float on Gen < 6 + * (and not allowed on later platforms) + */ + if (devinfo->gen < 6 && + (src0_exec_type == BRW_HW_REG_TYPE_F || + src1_exec_type == BRW_HW_REG_TYPE_F)) + return BRW_HW_REG_TYPE_F; + + if (src0_exec_type == GEN8_HW_REG_TYPE_Q || + src1_exec_type == GEN8_HW_REG_TYPE_Q) + return GEN8_HW_REG_TYPE_Q; + + if (src0_exec_type == BRW_HW_REG_TYPE_D || + src1_exec_type == BRW_HW_REG_TYPE_D) + return BRW_HW_REG_TYPE_D; + + if (src0_exec_type == BRW_HW_REG_TYPE_W || + src1_exec_type == BRW_HW_REG_TYPE_W) + return BRW_HW_REG_TYPE_W; + + if (src0_exec_type == GEN7_HW_REG_NON_IMM_TYPE_DF || + src1_exec_type == GEN7_HW_REG_NON_IMM_TYPE_DF) + return GEN7_HW_REG_NON_IMM_TYPE_DF; + + if (devinfo->gen >= 9 || devinfo->is_cherryview) { + if (dst_exec_type == BRW_HW_REG_TYPE_F || + src0_exec_type == BRW_HW_REG_TYPE_F || + src1_exec_type == BRW_HW_REG_TYPE_F) { + return BRW_HW_REG_TYPE_F; + } else { + return GEN8_HW_REG_NON_IMM_TYPE_HF; + } + } + + assert(src0_exec_type == BRW_HW_REG_TYPE_F); + return BRW_HW_REG_TYPE_F; +} + +/** + * Returns whether a region is packed + * + * A region is packed if its elements are adjacent in memory, with no + * intervening space, no overlap, and no replicated values. + */ +static bool +is_packed(unsigned vstride, unsigned width, unsigned hstride) +{ + if (vstride == width) { + if (vstride == 1) { + return hstride == 0; + } else { + return hstride == 1; + } + } + + return false; +} + +/** + * Checks restrictions listed in "General Restrictions Based on Operand Types" + * in the "Register Region Restrictions" section. + */ +static struct string +general_restrictions_based_on_operand_types(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + const struct opcode_desc *desc = + brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); + unsigned num_sources = num_sources_from_inst(devinfo, inst); + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); + struct string error_msg = { .str = NULL, .len = 0 }; + + if (num_sources == 3) + return (struct string){}; + + if (inst_is_send(devinfo, inst)) + return (struct string){}; + + if (exec_size == 1) + return (struct string){}; + + if (desc->ndst == 0) + return (struct string){}; + + /* The PRMs say: + * + * Where n is the largest element size in bytes for any source or + * destination operand type, ExecSize * n must be <= 64. + * + * But we do not attempt to enforce it, because it is implied by other + * rules: + * + * - that the destination stride must match the execution data type + * - sources may not span more than two adjacent GRF registers + * - destination may not span more than two adjacent GRF registers + * + * In fact, checking it would weaken testing of the other rules. + */ + + if (num_sources == 3) + return (struct string){}; + + if (exec_size == 1) + return (struct string){}; + + if (inst_is_send(devinfo, inst)) + return (struct string){}; + + if (desc->ndst == 0) + return (struct string){}; + + unsigned dst_stride = 1 << (brw_inst_dst_hstride(devinfo, inst) - 1); + bool dst_type_is_byte = + brw_inst_dst_reg_type(devinfo, inst) == BRW_HW_REG_NON_IMM_TYPE_B || + brw_inst_dst_reg_type(devinfo, inst) == BRW_HW_REG_NON_IMM_TYPE_UB; + + if (dst_type_is_byte) { + if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { + if (!inst_is_raw_move(devinfo, inst)) { + ERROR("Only raw MOV supports a packed-byte destination"); + return error_msg; + } else { + return (struct string){}; + } + } + } + + unsigned exec_type = execution_type(devinfo, inst); + unsigned exec_type_size = + brw_hw_reg_type_to_size(devinfo, exec_type, BRW_GENERAL_REGISTER_FILE); + unsigned dst_type_size = brw_element_size(devinfo, inst, dst); + + if (exec_type_size > dst_type_size) { + ERROR_IF(dst_stride * dst_type_size != exec_type_size, + "Destination stride must be equal to the ratio of the sizes of " + "the execution data type to the destination type"); + + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && + brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { + /* The i965 PRM says: + * + * Implementation Restriction: The relaxed alignment rule for byte + * destination (#10.5) is not supported. + */ + if ((devinfo->gen > 4 || devinfo->is_g4x) && dst_type_is_byte) { + ERROR_IF(subreg % exec_type_size != 0 && + subreg % exec_type_size != 1, + "Destination subreg must be aligned to the size of the " + "execution data type (or to the next lowest byte for byte " + "destinations)"); + } else { + ERROR_IF(subreg % exec_type_size != 0, + "Destination subreg must be aligned to the size of the " + "execution data type"); + } + } + } + + return error_msg; +} + +/** + * Checks restrictions listed in "General Restrictions on Regioning Parameters" + * in the "Register Region Restrictions" section. + */ +static struct string +general_restrictions_on_region_parameters(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + const struct opcode_desc *desc = + brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); + unsigned num_sources = num_sources_from_inst(devinfo, inst); + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); + struct string error_msg = { .str = NULL, .len = 0 }; + + if (num_sources == 3) + return (struct string){}; + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { + if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) + ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, + "Destination Horizontal Stride must be 1"); + + if (num_sources >= 1) { + if (devinfo->is_haswell || devinfo->gen >= 8) { + ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && + brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && + brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && + brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); + } else { + ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && + brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && + brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "In Align16 mode, only VertStride of 0 or 4 is allowed"); + } + } + + if (num_sources == 2) { + if (devinfo->is_haswell || devinfo->gen >= 8) { + ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); + } else { + ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && + brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, + "In Align16 mode, only VertStride of 0 or 4 is allowed"); + } + } + + return error_msg; + } + + for (unsigned i = 0; i < num_sources; i++) { + unsigned vstride, width, hstride, element_size, subreg; + +#define DO_SRC(n) \ + if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ + BRW_IMMEDIATE_VALUE) \ + continue; \ + \ + vstride = brw_inst_src ## n ## _vstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _vstride(devinfo, inst) - 1)) : 0; \ + width = 1 << brw_inst_src ## n ## _width(devinfo, inst); \ + hstride = brw_inst_src ## n ## _hstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _hstride(devinfo, inst) - 1)) : 0; \ + element_size = brw_element_size(devinfo, inst, src ## n); \ + subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) + + if (i == 0) { + DO_SRC(0); + } else if (i == 1) { + DO_SRC(1); + } +#undef DO_SRC + + /* ExecSize must be greater than or equal to Width. */ + ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " + "to Width"); + + /* If ExecSize = Width and HorzStride ≠ 0, + * VertStride must be set to Width * HorzStride. + */ + if (exec_size == width && hstride != 0) { + ERROR_IF(vstride != width * hstride, + "If ExecSize = Width and HorzStride ≠ 0, " + "VertStride must be set to Width * HorzStride"); + } + + /* If Width = 1, HorzStride must be 0 regardless of the values of + * ExecSize and VertStride. + */ + if (width == 1) { + ERROR_IF(hstride != 0, + "If Width = 1, HorzStride must be 0 regardless " + "of the values of ExecSize and VertStride"); + } + + /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ + if (exec_size == 1 && width == 1) { + ERROR_IF(vstride != 0 || hstride != 0, + "If ExecSize = Width = 1, both VertStride " + "and HorzStride must be 0"); + } + + /* If VertStride = HorzStride = 0, Width must be 1 regardless of the + * value of ExecSize. + */ + if (vstride == 0 && hstride == 0) { + ERROR_IF(width != 1, + "If VertStride = HorzStride = 0, Width must be " + "1 regardless of the value of ExecSize"); + } + + /* VertStride must be used to cross GRF register boundaries. This rule + * implies that elements within a 'Width' cannot cross GRF boundaries. + */ + const uint64_t mask = (1 << element_size) - 1; + unsigned rowbase = subreg; + + for (int y = 0; y < exec_size / width; y++) { + uint64_t access_mask = 0; + unsigned offset = rowbase; + + for (int x = 0; x < width; x++) { + access_mask |= mask << offset; + offset += hstride * element_size; + } + + rowbase += vstride * element_size; + + if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { + ERROR("VertStride must be used to cross GRF register boundaries"); + break; + } + } + } + + /* Dst.HorzStride must not be 0. */ + if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { + ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, + "Destination Horizontal Stride must not be 0"); + } + + return error_msg; +} + +/** + * Creates an \p access_mask for an \p exec_size, \p element_size, and a region + * + * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is + * a bitmask of bytes accessed by the region. + * + * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 + * instruction would be + * + * access_mask[0] = 0x00000000000000F0 + * access_mask[1] = 0x000000000000F000 + * access_mask[2] = 0x0000000000F00000 + * access_mask[3] = 0x00000000F0000000 + * access_mask[4-31] = 0 + * + * because the first execution channel accesses bytes 7-4 and the second + * execution channel accesses bytes 15-12, etc. + */ +static void +align1_access_mask(uint64_t access_mask[static 32], + unsigned exec_size, unsigned element_size, unsigned subreg, + unsigned vstride, unsigned width, unsigned hstride) +{ + const uint64_t mask = (1 << element_size) - 1; + unsigned rowbase = subreg; + unsigned element = 0; + + for (int y = 0; y < exec_size / width; y++) { + unsigned offset = rowbase; + + for (int x = 0; x < width; x++) { + access_mask[element++] = mask << offset; + offset += hstride * element_size; + } + + rowbase += vstride * element_size; + } + + assert(element == 0 || element == exec_size); +} + +/** + * Returns the number of registers accessed according to the \p access_mask + */ +static int +registers_read(const uint64_t access_mask[static 32]) +{ + int regs_read = 0; + + for (unsigned i = 0; i < 32; i++) { + if (access_mask[i] > 0xFFFFFFFF) { + return 2; + } else if (access_mask[i]) { + regs_read = 1; + } + } + + return regs_read; +} + +/** + * Checks restrictions listed in "Region Alignment Rules" in the "Register + * Region Restrictions" section. + */ +static struct string +region_alignment_rules(const struct gen_device_info *devinfo, + const brw_inst *inst) +{ + const struct opcode_desc *desc = + brw_opcode_desc(devinfo, brw_inst_opcode(devinfo, inst)); + unsigned num_sources = num_sources_from_inst(devinfo, inst); + unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); + uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; + struct string error_msg = { .str = NULL, .len = 0 }; + + if (num_sources == 3) + return (struct string){}; + + if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) + return (struct string){}; + + if (inst_is_send(devinfo, inst)) + return (struct string){}; + + memset(dst_access_mask, 0, sizeof(dst_access_mask)); + memset(src0_access_mask, 0, sizeof(src0_access_mask)); + memset(src1_access_mask, 0, sizeof(src1_access_mask)); + + for (unsigned i = 0; i < num_sources; i++) { + unsigned vstride, width, hstride, element_size, subreg; + + /* In Direct Addressing mode, a source cannot span more than 2 adjacent + * GRF registers. + */ + +#define DO_SRC(n) \ + if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ + BRW_ADDRESS_DIRECT) \ + continue; \ + \ + if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ + BRW_IMMEDIATE_VALUE) \ + continue; \ + \ + vstride = brw_inst_src ## n ## _vstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _vstride(devinfo, inst) - 1)) : 0; \ + width = 1 << brw_inst_src ## n ## _width(devinfo, inst); \ + hstride = brw_inst_src ## n ## _hstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _hstride(devinfo, inst) - 1)) : 0; \ + element_size = brw_element_size(devinfo, inst, src ## n); \ + subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ + align1_access_mask(src ## n ## _access_mask, \ + exec_size, element_size, subreg, \ + vstride, width, hstride) + + if (i == 0) { + DO_SRC(0); + } else if (i == 1) { + DO_SRC(1); + } +#undef DO_SRC + + unsigned num_vstride = exec_size / width; + unsigned num_hstride = width; + unsigned vstride_elements = (num_vstride - 1) * vstride; + unsigned hstride_elements = (num_hstride - 1) * hstride; + unsigned offset = (vstride_elements + hstride_elements) * element_size + + subreg; + ERROR_IF(offset >= 64, + "A source cannot span more than 2 adjacent GRF registers"); + } + + if (desc->ndst == 0 || dst_is_null(devinfo, inst)) + return error_msg; + + unsigned stride = 1 << (brw_inst_dst_hstride(devinfo, inst) - 1); + unsigned element_size = brw_element_size(devinfo, inst, dst); + unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); + unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; + ERROR_IF(offset >= 64, + "A destination cannot span more than 2 adjacent GRF registers"); + + if (error_msg.str) + return error_msg; + + align1_access_mask(dst_access_mask, exec_size, element_size, subreg, + exec_size == 1 ? 0 : exec_size * stride, + exec_size == 1 ? 1 : exec_size, + exec_size == 1 ? 0 : stride); + + unsigned dst_regs = registers_read(dst_access_mask); + unsigned src0_regs = registers_read(src0_access_mask); + unsigned src1_regs = registers_read(src1_access_mask); + + /* The SNB, IVB, HSW, BDW, and CHV PRMs say: + * + * When an instruction has a source region spanning two registers and a + * destination region contained in one register, the number of elements + * must be the same between two sources and one of the following must be + * true: + * + * 1. The destination region is entirely contained in the lower OWord + * of a register. + * 2. The destination region is entirely contained in the upper OWord + * of a register. + * 3. The destination elements are evenly split between the two OWords + * of a register. + */ + if (devinfo->gen <= 8) { + if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { + unsigned upper_oword_writes = 0, lower_oword_writes = 0; + + for (unsigned i = 0; i < exec_size; i++) { + if (dst_access_mask[i] > 0x0000FFFF) { + upper_oword_writes++; + } else { + assert(dst_access_mask[i] != 0); + lower_oword_writes++; + } + } + + ERROR_IF(lower_oword_writes != 0 && + upper_oword_writes != 0 && + upper_oword_writes != lower_oword_writes, + "Writes must be to only one OWord or " + "evenly split between OWords"); + } + } + + /* The IVB and HSW PRMs say: + * + * When an instruction has a source region that spans two registers and + * the destination spans two registers, the destination elements must be + * evenly split between the two registers [...] + * + * The SNB PRM contains similar wording (but written in a much more + * confusing manner). + * + * The BDW PRM says: + * + * When destination spans two registers, the source may be one or two + * registers. The destination elements must be evenly split between the + * two registers. + * + * The SKL PRM says: + * + * When destination of MATH instruction spans two registers, the + * destination elements must be evenly split between the two registers. + * + * It is not known whether this restriction applies to KBL other Gens after + * SKL. + */ + if (devinfo->gen <= 8 || + brw_inst_opcode(devinfo, inst) == BRW_OPCODE_MATH) { + + /* Nothing explicitly states that on Gen < 8 elements must be evenly + * split between two destination registers in the two exceptional + * source-region-spans-one-register cases, but since Broadwell requires + * evenly split writes regardless of source region, we assume that it was + * an oversight and require it. + */ + if (dst_regs == 2) { + unsigned upper_reg_writes = 0, lower_reg_writes = 0; + + for (unsigned i = 0; i < exec_size; i++) { + if (dst_access_mask[i] > 0xFFFFFFFF) { + upper_reg_writes++; + } else { + assert(dst_access_mask[i] != 0); + lower_reg_writes++; + } + } + + ERROR_IF(upper_reg_writes != lower_reg_writes, + "Writes must be evenly split between the two " + "destination registers"); + } + } + + /* The IVB and HSW PRMs say: + * + * When an instruction has a source region that spans two registers and + * the destination spans two registers, the destination elements must be + * evenly split between the two registers and each destination register + * must be entirely derived from one source register. + * + * Note: In such cases, the regioning parameters must ensure that the + * offset from the two source registers is the same. + * + * The SNB PRM contains similar wording (but written in a much more + * confusing manner). + * + * There are effectively three rules stated here: + * + * For an instruction with a source and a destination spanning two + * registers, + * + * (1) destination elements must be evenly split between the two + * registers + * (2) all destination elements in a register must be derived + * from one source register + * (3) the offset (i.e. the starting location in each of the two + * registers spanned by a region) must be the same in the two + * registers spanned by a region + * + * It is impossible to violate rule (1) without violating (2) or (3), so we + * do not attempt to validate it. + */ + if (devinfo->gen <= 7 && dst_regs == 2) { + for (unsigned i = 0; i < num_sources; i++) { +#define DO_SRC(n) \ + if (src ## n ## _regs <= 1) \ + continue; \ + \ + for (unsigned i = 0; i < exec_size; i++) { \ + if ((dst_access_mask[i] > 0xFFFFFFFF) != \ + (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ + ERROR("Each destination register must be entirely derived " \ + "from one source register"); \ + break; \ + } \ + } \ + \ + unsigned offset_0 = \ + brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ + unsigned offset_1 = offset_0; \ + \ + for (unsigned i = 0; i < exec_size; i++) { \ + if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ + offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ + break; \ + } \ + } \ + \ + ERROR_IF(offset_0 != offset_1, \ + "The offset from the two source registers " \ + "must be the same") + + if (i == 0) { + DO_SRC(0); + } else if (i == 1) { + DO_SRC(1); + } +#undef DO_SRC + } + } + + /* The IVB and HSW PRMs say: + * + * When destination spans two registers, the source MUST span two + * registers. The exception to the above rule: + * 1. When source is scalar, the source registers are not + * incremented. + * 2. When source is packed integer Word and destination is packed + * integer DWord, the source register is not incremented by the + * source sub register is incremented. + * + * The SNB PRM does not contain this rule, but the internal documentation + * indicates that it applies to SNB as well. We assume that the rule applies + * to Gen <= 5 although their PRMs do not state it. + * + * While the documentation explicitly says in exception (2) that the + * destination must be an integer DWord, the hardware allows at least a + * float destination type as well. We emit such instructions from + * + * fs_visitor::emit_interpolation_setup_gen6 + * fs_visitor::emit_fragcoord_interpolation + * + * and have for years with no ill effects. + * + * Additionally the simulator source code indicates that the real condition + * is that the size of the destination type is 4 bytes. + */ + if (devinfo->gen <= 7 && dst_regs == 2) { + bool dst_is_packed_dword = + is_packed(exec_size * stride, exec_size, stride) && + brw_element_size(devinfo, inst, dst) == 4; + + for (unsigned i = 0; i < num_sources; i++) { +#define DO_SRC(n) \ + unsigned vstride, width, hstride; \ + vstride = brw_inst_src ## n ## _vstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _vstride(devinfo, inst) - 1)) : 0; \ + width = 1 << brw_inst_src ## n ## _width(devinfo, inst); \ + hstride = brw_inst_src ## n ## _hstride(devinfo, inst) ? \ + (1 << (brw_inst_src ## n ## _hstride(devinfo, inst) - 1)) : 0; \ + bool src ## n ## _is_packed_word = \ + is_packed(vstride, width, hstride) && \ + (brw_inst_src ## n ## _reg_type(devinfo, inst) == BRW_HW_REG_TYPE_W || \ + brw_inst_src ## n ## _reg_type(devinfo, inst) == BRW_HW_REG_TYPE_UW); \ + \ + ERROR_IF(src ## n ## _regs == 1 && \ + !src ## n ## _has_scalar_region(devinfo, inst) && \ + !(dst_is_packed_dword && src ## n ## _is_packed_word), \ + "When the destination spans two registers, the source must " \ + "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ + "source and packed-word to packed-dword expansion)") + + if (i == 0) { + DO_SRC(0); + } else if (i == 1) { + DO_SRC(1); + } +#undef DO_SRC + } + } + + return error_msg; +} + +bool +brw_validate_instructions(const struct brw_codegen *p, int start_offset, + struct annotation_info *annotation) +{ + const struct gen_device_info *devinfo = p->devinfo; + const void *store = p->store; + bool valid = true; + + for (int src_offset = start_offset; src_offset < p->next_insn_offset; + src_offset += sizeof(brw_inst)) { + struct string error_msg = { .str = NULL, .len = 0 }; + const brw_inst *inst = store + src_offset; + + if (is_unsupported_inst(devinfo, inst)) { + ERROR("Instruction not supported on this Gen"); + } else { + CHECK(sources_not_null); + CHECK(send_restrictions); + CHECK(general_restrictions_based_on_operand_types); + CHECK(general_restrictions_on_region_parameters); + CHECK(region_alignment_rules); + } + + if (error_msg.str && annotation) { + annotation_insert_error(annotation, src_offset, error_msg.str); + } + valid = valid && error_msg.len == 0; + free(error_msg.str); + } + + return valid; +} |