diff options
author | Samuel Iglesias Gonsálvez <[email protected]> | 2017-01-20 08:47:05 +0100 |
---|---|---|
committer | Francisco Jerez <[email protected]> | 2017-04-14 14:56:07 -0700 |
commit | aeecc82d057adf43189d08214b21ca5166ad9682 (patch) | |
tree | e1eb456a4d5371a10d853d12c1da9468cc968d97 /src/intel | |
parent | 94ffeb7fa2257af3eb416a1e720e08911835665b (diff) |
i965/fs: generalize the legalization d2x pass
Generalize it to lower any unsupported narrower conversion.
v2 (Curro):
- Add supports_type_conversion()
- Reuse existing intruction instead of cloning it.
- Generalize d2x to narrower and equal size conversions.
v3 (Curro):
- Make supports_type_conversion() const and improve it.
- Use foreach_block_and_inst to process added instructions.
- Simplify code.
- Add assert and improve comments.
- Remove redundant mov.
- Remove useless comment.
- Remove saturate == false assert and add support for saturation
when fixing the conversion.
- Add get_exec_type() function.
v4 (Curro):
- Use get_exec_type() function to get sources' type.
Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]>
Reviewed-by: Francisco Jerez <[email protected]>
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 11 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_lower_d2x.cpp | 93 |
2 files changed, 67 insertions, 37 deletions
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 086b1a04855..8eb8789905c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5694,11 +5694,6 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } - if (OPT(lower_d2x)) { - OPT(opt_copy_propagation); - OPT(dead_code_eliminate); - } - OPT(lower_simd_width); /* After SIMD lowering just in case we had to unroll the EOT send. */ @@ -5745,6 +5740,12 @@ fs_visitor::optimize() OPT(dead_code_eliminate); } + if (OPT(lower_d2x)) { + OPT(opt_copy_propagation); + OPT(dead_code_eliminate); + OPT(lower_simd_width); + } + lower_uniform_pull_constant_loads(); validate(); diff --git a/src/intel/compiler/brw_fs_lower_d2x.cpp b/src/intel/compiler/brw_fs_lower_d2x.cpp index a2db1154615..bc316360d05 100644 --- a/src/intel/compiler/brw_fs_lower_d2x.cpp +++ b/src/intel/compiler/brw_fs_lower_d2x.cpp @@ -27,48 +27,77 @@ using namespace brw; +static bool +supports_type_conversion(const fs_inst *inst) { + switch (inst->opcode) { + case BRW_OPCODE_MOV: + case SHADER_OPCODE_MOV_INDIRECT: + return true; + case BRW_OPCODE_SEL: + return inst->dst.type == get_exec_type(inst); + default: + /* FIXME: We assume the opcodes don't explicitly mentioned + * before just work fine with arbitrary conversions. + */ + return true; + } +} + bool fs_visitor::lower_d2x() { bool progress = false; - foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { - if (inst->opcode != BRW_OPCODE_MOV) - continue; - - if (inst->dst.type != BRW_REGISTER_TYPE_F && - inst->dst.type != BRW_REGISTER_TYPE_D && - inst->dst.type != BRW_REGISTER_TYPE_UD) - continue; + foreach_block_and_inst(block, fs_inst, inst, cfg) { + const fs_builder ibld(this, block, inst); + fs_reg dst = inst->dst; + bool saturate = inst->saturate; - if (inst->src[0].type != BRW_REGISTER_TYPE_DF && - inst->src[0].type != BRW_REGISTER_TYPE_UQ && - inst->src[0].type != BRW_REGISTER_TYPE_Q) - continue; + if (supports_type_conversion(inst)) { + if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) { + /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to + * Single Precision Float": + * + * The upper Dword of every Qword will be written with undefined + * value when converting DF to F. + * + * So we need to allocate a temporary that's two registers, and then do + * a strided MOV to get the lower DWord of every Qword that has the + * result. + */ + fs_reg temp = ibld.vgrf(get_exec_type(inst)); + fs_reg strided_temp = subscript(temp, dst.type, 0); - assert(inst->dst.file == VGRF); - assert(inst->saturate == false); - fs_reg dst = inst->dst; + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = strided_temp; + inst->saturate = false; + /* As it is an strided destination, we write n-times more being n the + * size ratio between source and destination types. Update + * size_written accordingly. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); + ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate; - const fs_builder ibld(this, block, inst); + progress = true; + } + } else { + fs_reg temp0 = ibld.vgrf(get_exec_type(inst)); - /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to - * Single Precision Float": - * - * The upper Dword of every Qword will be written with undefined - * value when converting DF to F. - * - * So we need to allocate a temporary that's two registers, and then do - * a strided MOV to get the lower DWord of every Qword that has the - * result. - */ - fs_reg temp = ibld.vgrf(inst->src[0].type, 1); - fs_reg strided_temp = subscript(temp, inst->dst.type, 0); - ibld.MOV(strided_temp, inst->src[0]); - ibld.MOV(dst, strided_temp); + assert(inst->size_written == inst->dst.component_size(inst->exec_size)); + inst->dst = temp0; + /* As it is an strided destination, we write n-times more being n the + * size ratio between source and destination types. Update + * size_written accordingly. + */ + inst->size_written = inst->dst.component_size(inst->exec_size); + inst->saturate = false; + /* Now, do the conversion to original destination's type. In next iteration, + * we will lower it if it is a d2f conversion. + */ + ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate; - inst->remove(block); - progress = true; + progress = true; + } } if (progress) |