summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler/brw_fs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/intel/compiler/brw_fs.cpp')
-rw-r--r--src/intel/compiler/brw_fs.cpp29
1 files changed, 26 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 2f0f0151219..e790a9c8d63 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -315,6 +315,24 @@ fs_inst::has_source_and_destination_hazard() const
* may stomp all over it.
*/
return true;
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ switch (src[1].ud) {
+ case BRW_SWIZZLE_XXXX:
+ case BRW_SWIZZLE_YYYY:
+ case BRW_SWIZZLE_ZZZZ:
+ case BRW_SWIZZLE_WWWW:
+ case BRW_SWIZZLE_XXZZ:
+ case BRW_SWIZZLE_YYWW:
+ case BRW_SWIZZLE_XYXY:
+ case BRW_SWIZZLE_ZWZW:
+ /* These can be implemented as a single Align1 region on all
+ * platforms, so there's never a hazard between source and
+ * destination. C.f. fs_generator::generate_quad_swizzle().
+ */
+ return false;
+ default:
+ return !is_uniform(src[0]);
+ }
default:
/* The SIMD16 compressed instruction
*
@@ -5579,9 +5597,14 @@ get_lowered_simd_width(const struct gen_device_info *devinfo,
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
return MIN2(8, inst->exec_size);
- case SHADER_OPCODE_QUAD_SWIZZLE:
- return 8;
-
+ case SHADER_OPCODE_QUAD_SWIZZLE: {
+ const unsigned swiz = inst->src[1].ud;
+ return (is_uniform(inst->src[0]) ?
+ get_fpu_lowered_simd_width(devinfo, inst) :
+ devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ? 8 :
+ swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 :
+ get_fpu_lowered_simd_width(devinfo, inst));
+ }
case SHADER_OPCODE_MOV_INDIRECT: {
/* From IVB and HSW PRMs:
*