aboutsummaryrefslogtreecommitdiffstats
path: root/src/intel/compiler/brw_fs.cpp
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2019-01-16 18:30:08 -0800
committerFrancisco Jerez <[email protected]>2019-02-21 14:07:25 -0800
commitc3c27762f787a93ee3f27189bef8d7cdcb3a6cab (patch)
treeaa51a2a70d64104ff31b4746d79e5cc5aa37b4e6 /src/intel/compiler/brw_fs.cpp
parentd9e08e753bf4125edcdb561bbe80547999eb8e32 (diff)
intel/fs: Exclude control sources from execution type and region alignment calculations.
Currently the execution type calculation will return a bogus value in cases like: mov_indirect(8) vgrf0:w, vgrf1:w, vgrf2:ud, 32u Which will be considered to have a 32-bit integer execution type even though the actual indirect move operation will be carried out with 16-bit precision. Similarly there's no need to apply the CHV/BXT double-precision region alignment restrictions to such control sources, since they aren't directly involved in the double-precision arithmetic operations emitted by these virtual instructions. Applying the CHV/BXT restrictions to control sources was expected to be harmless if mildly inefficient, but unfortunately it exposed problems at codegen level for virtual instructions (namely the SHUFFLE instruction used for the Vulkan 1.1 subgroup feature) that weren't prepared to accept control sources with an arbitrary strided region. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109328 Reported-by: Mark Janes <[email protected]> Fixes: efa4e4bc5fc "intel/fs: Introduce regioning lowering pass." Tested-by: Anuj Phogat <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs.cpp')
-rw-r--r--src/intel/compiler/brw_fs.cpp56
1 files changed, 56 insertions, 0 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5a18ba86a96..4a8f8ea5740 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -251,6 +251,62 @@ fs_inst::is_send_from_grf() const
}
}
+bool
+fs_inst::is_control_source(unsigned arg) const
+{
+ switch (opcode) {
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
+ return arg == 0;
+
+ case SHADER_OPCODE_BROADCAST:
+ case SHADER_OPCODE_SHUFFLE:
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+ case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ case SHADER_OPCODE_IMAGE_SIZE:
+ case SHADER_OPCODE_GET_BUFFER_SIZE:
+ return arg == 1;
+
+ case SHADER_OPCODE_MOV_INDIRECT:
+ case SHADER_OPCODE_CLUSTER_BROADCAST:
+ case SHADER_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case SHADER_OPCODE_TXD:
+ case SHADER_OPCODE_TXF:
+ case SHADER_OPCODE_TXF_LZ:
+ case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_CMS_W:
+ case SHADER_OPCODE_TXF_UMS:
+ case SHADER_OPCODE_TXF_MCS:
+ case SHADER_OPCODE_TXL:
+ case SHADER_OPCODE_TXL_LZ:
+ case SHADER_OPCODE_TXS:
+ case SHADER_OPCODE_LOD:
+ case SHADER_OPCODE_TG4:
+ case SHADER_OPCODE_TG4_OFFSET:
+ case SHADER_OPCODE_SAMPLEINFO:
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_BYTE_SCATTERED_READ:
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ return arg == 1 || arg == 2;
+
+ case SHADER_OPCODE_SEND:
+ return arg == 0 || arg == 1;
+
+ default:
+ return false;
+ }
+}
+
/**
* Returns true if this instruction's sources and destinations cannot
* safely be the same register.