summaryrefslogtreecommitdiffstats
path: root/src/intel
diff options
context:
space:
mode:
authorSamuel Iglesias Gonsálvez <[email protected]>2017-04-26 10:12:03 +0200
committerEmil Velikov <[email protected]>2017-05-05 19:35:08 +0100
commitca357be5aa2fbedc115c1e411db87d663e37ebb5 (patch)
tree7c7159c1167c4beb5b148f7616f63813547b5024 /src/intel
parente702379663c10251382b4fd38ab329a5f0b7be8b (diff)
i965/vec4: fix vertical stride to avoid breaking region parameter rule
From IVB PRM, vol4, part3, "General Restrictions on Regioning Parameters": "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to Width * HorzStride." In next patch, we are going to modify the region parameter for uniforms and vgrf. For uniforms that are the source of DF align1 instructions, they will have <0, 4, 1> regioning and the execsize for those instructions will be 4, so they will break the regioning rule. This will be the same for VGRF sources where we use the vstride == 0 exploit. As we know we are not going to cross the GRF boundary with that execsize and parameters (not even with the exploit), we just fix the vstride here. v2: - Move is_align1_df() (Curro) - Refactor exec_size == width calculation (Curro) Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> Cc: "17.1" <[email protected]> Reviewed-by: Francisco Jerez <[email protected]> (cherry picked from commit 7f728bce811fc283e672e3a07b008bb7b52de35e)
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/compiler/brw_vec4.cpp50
1 files changed, 32 insertions, 18 deletions
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 0909ddb5861..e27be8fc254 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1948,6 +1948,24 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
inst->mlen = 2;
}
+static bool
+is_align1_df(vec4_instruction *inst)
+{
+ switch (inst->opcode) {
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ case VEC4_OPCODE_DOUBLE_TO_U32:
+ case VEC4_OPCODE_TO_DOUBLE:
+ case VEC4_OPCODE_PICK_LOW_32BIT:
+ case VEC4_OPCODE_PICK_HIGH_32BIT:
+ case VEC4_OPCODE_SET_LOW_32BIT:
+ case VEC4_OPCODE_SET_HIGH_32BIT:
+ return true;
+ default:
+ return false;
+ }
+}
+
void
vec4_visitor::convert_to_hw_regs()
{
@@ -2005,6 +2023,20 @@ vec4_visitor::convert_to_hw_regs()
apply_logical_swizzle(&reg, inst, i);
src = reg;
+
+ /* From IVB PRM, vol4, part3, "General Restrictions on Regioning
+ * Parameters":
+ *
+ * "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set
+ * to Width * HorzStride."
+ *
+ * We can break this rule with DF sources on DF align1
+ * instructions, because the exec_size would be 4 and width is 4.
+ * As we know we are not accessing to next GRF, it is safe to
+ * set vstride to the formula given by the rule itself.
+ */
+ if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
+ src.vstride = src.width + src.hstride;
}
if (inst->is_3src(devinfo)) {
@@ -2262,24 +2294,6 @@ vec4_visitor::lower_simd_width()
return progress;
}
-static bool
-is_align1_df(vec4_instruction *inst)
-{
- switch (inst->opcode) {
- case VEC4_OPCODE_DOUBLE_TO_F32:
- case VEC4_OPCODE_DOUBLE_TO_D32:
- case VEC4_OPCODE_DOUBLE_TO_U32:
- case VEC4_OPCODE_TO_DOUBLE:
- case VEC4_OPCODE_PICK_LOW_32BIT:
- case VEC4_OPCODE_PICK_HIGH_32BIT:
- case VEC4_OPCODE_SET_LOW_32BIT:
- case VEC4_OPCODE_SET_HIGH_32BIT:
- return true;
- default:
- return false;
- }
-}
-
static brw_predicate
scalarize_predicate(brw_predicate predicate, unsigned writemask)
{