diff options
author | Francisco Jerez <[email protected]> | 2018-12-06 14:11:34 -0800 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2019-01-09 12:03:08 -0800 |
commit | 812ede088f5f6bea4e6fba991bd59d5cce264212 (patch) | |
tree | dc733a715ae81c8a0bf60c7da6ce4626f2f85573 /src/intel/compiler/brw_fs.cpp | |
parent | c5f9c0009d5161e059e54a76fbdb910a6c151f9f (diff) |
intel/fs: Implement quad swizzles on ICL+.
Align16 is no longer a thing, so a new implementation is provided
using Align1 instead. Not all possible swizzles can be represented as
a single Align1 region, but some fast paths are provided for
frequently used swizzles that can be represented efficiently in Align1
mode.
Fixes ~90 subgroup quad swap Vulkan CTS tests.
Cc: [email protected]
Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src/intel/compiler/brw_fs.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 29 |
1 files changed, 26 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2f0f0151219..e790a9c8d63 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -315,6 +315,24 @@ fs_inst::has_source_and_destination_hazard() const * may stomp all over it. */ return true; + case SHADER_OPCODE_QUAD_SWIZZLE: + switch (src[1].ud) { + case BRW_SWIZZLE_XXXX: + case BRW_SWIZZLE_YYYY: + case BRW_SWIZZLE_ZZZZ: + case BRW_SWIZZLE_WWWW: + case BRW_SWIZZLE_XXZZ: + case BRW_SWIZZLE_YYWW: + case BRW_SWIZZLE_XYXY: + case BRW_SWIZZLE_ZWZW: + /* These can be implemented as a single Align1 region on all + * platforms, so there's never a hazard between source and + * destination. C.f. fs_generator::generate_quad_swizzle(). + */ + return false; + default: + return !is_uniform(src[0]); + } default: /* The SIMD16 compressed instruction * @@ -5579,9 +5597,14 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: return MIN2(8, inst->exec_size); - case SHADER_OPCODE_QUAD_SWIZZLE: - return 8; - + case SHADER_OPCODE_QUAD_SWIZZLE: { + const unsigned swiz = inst->src[1].ud; + return (is_uniform(inst->src[0]) ? + get_fpu_lowered_simd_width(devinfo, inst) : + devinfo->gen < 11 && type_sz(inst->src[0].type) == 4 ? 8 : + swiz == BRW_SWIZZLE_XYXY || swiz == BRW_SWIZZLE_ZWZW ? 4 : + get_fpu_lowered_simd_width(devinfo, inst)); + } case SHADER_OPCODE_MOV_INDIRECT: { /* From IVB and HSW PRMs: * |