diff options
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 56 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_lower_regioning.cpp | 6 | ||||
-rw-r--r-- | src/intel/compiler/brw_ir_fs.h | 10 |
3 files changed, 68 insertions, 4 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5a18ba86a96..4a8f8ea5740 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -251,6 +251,62 @@ fs_inst::is_send_from_grf() const } } +bool +fs_inst::is_control_source(unsigned arg) const +{ + switch (opcode) { + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: + case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: + return arg == 0; + + case SHADER_OPCODE_BROADCAST: + case SHADER_OPCODE_SHUFFLE: + case SHADER_OPCODE_QUAD_SWIZZLE: + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + case SHADER_OPCODE_IMAGE_SIZE: + case SHADER_OPCODE_GET_BUFFER_SIZE: + return arg == 1; + + case SHADER_OPCODE_MOV_INDIRECT: + case SHADER_OPCODE_CLUSTER_BROADCAST: + case SHADER_OPCODE_TEX: + case FS_OPCODE_TXB: + case SHADER_OPCODE_TXD: + case SHADER_OPCODE_TXF: + case SHADER_OPCODE_TXF_LZ: + case SHADER_OPCODE_TXF_CMS: + case SHADER_OPCODE_TXF_CMS_W: + case SHADER_OPCODE_TXF_UMS: + case SHADER_OPCODE_TXF_MCS: + case SHADER_OPCODE_TXL: + case SHADER_OPCODE_TXL_LZ: + case SHADER_OPCODE_TXS: + case SHADER_OPCODE_LOD: + case SHADER_OPCODE_TG4: + case SHADER_OPCODE_TG4_OFFSET: + case SHADER_OPCODE_SAMPLEINFO: + case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: + case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case SHADER_OPCODE_BYTE_SCATTERED_READ: + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_TYPED_SURFACE_READ: + case SHADER_OPCODE_TYPED_SURFACE_WRITE: + return arg == 1 || arg == 2; + + case SHADER_OPCODE_SEND: + return arg == 0 || arg == 1; + + default: + return false; + } +} + /** * Returns true if this instruction's sources and destinations cannot * safely be the same register. diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index df50993dee6..6a3c23892b4 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -74,7 +74,7 @@ namespace { unsigned stride = inst->dst.stride * type_sz(inst->dst.type); for (unsigned i = 0; i < inst->sources; i++) { - if (!is_uniform(inst->src[i])) + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) stride = MAX2(stride, inst->src[i].stride * type_sz(inst->src[i].type)); } @@ -92,7 +92,7 @@ namespace { required_dst_byte_offset(const fs_inst *inst) { for (unsigned i = 0; i < inst->sources; i++) { - if (!is_uniform(inst->src[i])) + if (!is_uniform(inst->src[i]) && !inst->is_control_source(i)) if (reg_offset(inst->src[i]) % REG_SIZE != reg_offset(inst->dst) % REG_SIZE) return 0; @@ -109,7 +109,7 @@ namespace { has_invalid_src_region(const gen_device_info *devinfo, const fs_inst *inst, unsigned i) { - if (is_unordered(inst)) { + if (is_unordered(inst) || inst->is_control_source(i)) { return false; } else { const unsigned dst_byte_stride = inst->dst.stride * type_sz(inst->dst.type); diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index d05357e822e..c4427a658b0 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -358,6 +358,13 @@ public: bool has_source_and_destination_hazard() const; /** + * Return whether \p arg is a control source of a virtual instruction which + * shouldn't contribute to the execution type and usual regioning + * restriction calculations of arithmetic instructions. + */ + bool is_control_source(unsigned arg) const; + + /** * Return the subset of flag registers read by the instruction as a bitset * with byte granularity. */ @@ -461,7 +468,8 @@ get_exec_type(const fs_inst *inst) brw_reg_type exec_type = BRW_REGISTER_TYPE_B; for (int i = 0; i < inst->sources; i++) { - if (inst->src[i].file != BAD_FILE) { + if (inst->src[i].file != BAD_FILE && + !inst->is_control_source(i)) { const brw_reg_type t = get_exec_type(inst->src[i].type); if (type_sz(t) > type_sz(exec_type)) exec_type = t; |