diff options
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 56 |
1 files changed, 55 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 426ea57d8f9..62ae9abede7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -275,6 +275,59 @@ is_logic_op(enum opcode opcode) opcode == BRW_OPCODE_NOT); } +static bool +can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, + const brw_device_info *devinfo) +{ + if (stride > 4) + return false; + + /* 3-source instructions can only be Align16, which restricts what strides + * they can take. They can only take a stride of 1 (the usual case), or 0 + * with a special "repctrl" bit. But the repctrl bit doesn't work for + * 64-bit datatypes, so if the source type is 64-bit then only a stride of + * 1 is allowed. From the Broadwell PRM, Volume 7 "3D Media GPGPU", page + * 944: + * + * This is applicable to 32b datatypes and 16b datatype. 64b datatypes + * cannot use the replicate control. + */ + if (inst->is_3src()) { + if (type_sz(inst->src[arg].type) > 4) + return stride == 1; + else + return stride == 1 || stride == 0; + } + + /* From the Broadwell PRM, Volume 2a "Command Reference - Instructions", + * page 391 ("Extended Math Function"): + * + * The following restrictions apply for align1 mode: Scalar source is + * supported. Source and destination horizontal stride must be the + * same. + * + * From the Haswell PRM Volume 2b "Command Reference - Instructions", page + * 134 ("Extended Math Function"): + * + * Scalar source is supported. Source and destination horizontal stride + * must be 1. + * + * and similar language exists for IVB and SNB. Pre-SNB, math instructions + * are sends, so the sources are moved to MRF's and there are no + * restrictions. + */ + if (inst->is_math()) { + if (devinfo->gen == 6 || devinfo->gen == 7) { + assert(inst->dst.stride == 1); + return stride == 1 || stride == 0; + } else if (devinfo->gen >= 8) { + return stride == inst->dst.stride || stride == 0; + } + } + + return true; +} + bool fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) { @@ -326,7 +379,8 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) /* Bail if the result of composing both strides would exceed the * hardware limit. */ - if (entry->src.stride * inst->src[arg].stride > 4) + if (!can_take_stride(inst, arg, entry->src.stride * inst->src[arg].stride, + devinfo)) return false; /* Bail if the instruction type is larger than the execution type of the |