diff options
author | Francisco Jerez <currojerez@riseup.net> | 2016-05-20 13:34:46 -0700 |
---|---|---|
committer | Francisco Jerez <currojerez@riseup.net> | 2016-05-27 23:19:22 -0700 |
commit | cf5443f984da4eb500c9b1ad9b9f53bc8747fef3 (patch) | |
tree | 152d7db089e934af4fd770a9d884774a2094868a | |
parent | 197833caa3d684c092ee76d1e9ff3fac28576b04 (diff) |
i965/fs: Limit SIMD width of various virtual opcodes to the maximum supported value.
Which is 16 or 8 in most cases. This will make sure that 32-wide
virtual instructions get chopped up into chunks of their maximum
execution size.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 45 |
1 files changed, 40 insertions, 5 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 32abe3843a3..03db084aa8b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -4816,6 +4816,22 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, /* Integer division is limited to SIMD8 on all generations. */ return MIN2(8, inst->exec_size); + case FS_OPCODE_LINTERP: + case FS_OPCODE_GET_BUFFER_SIZE: + case FS_OPCODE_DDX_COARSE: + case FS_OPCODE_DDX_FINE: + case FS_OPCODE_DDY_COARSE: + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: + case FS_OPCODE_PACK_HALF_2x16_SPLIT: + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: + case FS_OPCODE_INTERPOLATE_AT_CENTROID: + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + return MIN2(16, inst->exec_size); + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: /* Pre-ILK hardware doesn't have a SIMD8 variant of the texel fetch * message used to implement varying pull constant loads, so expand it @@ -4870,8 +4886,14 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, inst->exec_size == 8); /* Dual-source FB writes are unsupported in SIMD16 mode. */ return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ? - 8 : inst->exec_size); + 8 : MIN2(16, inst->exec_size)); + case SHADER_OPCODE_TEX_LOGICAL: + case SHADER_OPCODE_TXF_CMS_LOGICAL: + case SHADER_OPCODE_TXF_UMS_LOGICAL: + case SHADER_OPCODE_TXF_MCS_LOGICAL: + case SHADER_OPCODE_LOD_LOGICAL: + case SHADER_OPCODE_TG4_LOGICAL: case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return MIN2(16, inst->exec_size); @@ -4882,7 +4904,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TG4_OFFSET_LOGICAL: { /* gather4_po_c is unsupported in SIMD16 mode. */ const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C]; - return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size); + return (shadow_c.file != BAD_FILE ? 8 : MIN2(16, inst->exec_size)); } case SHADER_OPCODE_TXL_LOGICAL: case FS_OPCODE_TXB_LOGICAL: { @@ -4896,7 +4918,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE) return 8; else - return inst->exec_size; + return MIN2(16, inst->exec_size); } case SHADER_OPCODE_TXF_LOGICAL: case SHADER_OPCODE_TXS_LOGICAL: @@ -4906,7 +4928,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, if (devinfo->gen == 4) return 16; else - return inst->exec_size; + return MIN2(16, inst->exec_size); case SHADER_OPCODE_TXF_CMS_W_LOGICAL: { /* This opcode can take up to 6 arguments which means that in some @@ -4921,7 +4943,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE) return 8; else - return inst->exec_size; + return MIN2(16, inst->exec_size); } case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: @@ -4929,6 +4951,19 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return 8; + case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: + return MIN2(16, inst->exec_size); + + case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: + return MIN2(8, inst->exec_size); + case SHADER_OPCODE_MOV_INDIRECT: /* Prior to Broadwell, we only have 8 address subregisters */ return MIN3(devinfo->gen >= 8 ? 16 : 8, |