From ca357be5aa2fbedc115c1e411db87d663e37ebb5 Mon Sep 17 00:00:00 2001 From: Samuel Iglesias Gonsálvez Date: Wed, 26 Apr 2017 10:12:03 +0200 Subject: i965/vec4: fix vertical stride to avoid breaking region parameter rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From IVB PRM, vol4, part3, "General Restrictions on Regioning Parameters": "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to Width * HorzStride." In next patch, we are going to modify the region parameter for uniforms and vgrf. For uniforms that are the source of DF align1 instructions, they will have <0, 4, 1> regioning and the execsize for those instructions will be 4, so they will break the regioning rule. This will be the same for VGRF sources where we use the vstride == 0 exploit. As we know we are not going to cross the GRF boundary with that execsize and parameters (not even with the exploit), we just fix the vstride here. v2: - Move is_align1_df() (Curro) - Refactor exec_size == width calculation (Curro) Signed-off-by: Samuel Iglesias Gonsálvez Cc: "17.1" Reviewed-by: Francisco Jerez (cherry picked from commit 7f728bce811fc283e672e3a07b008bb7b52de35e) --- src/intel/compiler/brw_vec4.cpp | 50 ++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'src/intel') diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 0909ddb5861..e27be8fc254 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -1948,6 +1948,24 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) inst->mlen = 2; } +static bool +is_align1_df(vec4_instruction *inst) +{ + switch (inst->opcode) { + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: + case VEC4_OPCODE_TO_DOUBLE: + case VEC4_OPCODE_PICK_LOW_32BIT: + case VEC4_OPCODE_PICK_HIGH_32BIT: + case VEC4_OPCODE_SET_LOW_32BIT: + case VEC4_OPCODE_SET_HIGH_32BIT: + return true; + default: + return false; + } +} + void vec4_visitor::convert_to_hw_regs() { @@ -2005,6 +2023,20 @@ vec4_visitor::convert_to_hw_regs() apply_logical_swizzle(®, inst, i); src = reg; + + /* From IVB PRM, vol4, part3, "General Restrictions on Regioning + * Parameters": + * + * "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set + * to Width * HorzStride." + * + * We can break this rule with DF sources on DF align1 + * instructions, because the exec_size would be 4 and width is 4. + * As we know we are not accessing to next GRF, it is safe to + * set vstride to the formula given by the rule itself. + */ + if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width) + src.vstride = src.width + src.hstride; } if (inst->is_3src(devinfo)) { @@ -2262,24 +2294,6 @@ vec4_visitor::lower_simd_width() return progress; } -static bool -is_align1_df(vec4_instruction *inst) -{ - switch (inst->opcode) { - case VEC4_OPCODE_DOUBLE_TO_F32: - case VEC4_OPCODE_DOUBLE_TO_D32: - case VEC4_OPCODE_DOUBLE_TO_U32: - case VEC4_OPCODE_TO_DOUBLE: - case VEC4_OPCODE_PICK_LOW_32BIT: - case VEC4_OPCODE_PICK_HIGH_32BIT: - case VEC4_OPCODE_SET_LOW_32BIT: - case VEC4_OPCODE_SET_HIGH_32BIT: - return true; - default: - return false; - } -} - static brw_predicate scalarize_predicate(brw_predicate predicate, unsigned writemask) { -- cgit v1.2.3