diff options
author | Rhys Perry <pendingchaos02@gmail.com> | 2020-05-19 11:53:44 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2020-06-15 18:24:22 +0000 |
commit | b6d9e45f473edf4a3cfa86963b1849365f2297b1 (patch) | |
tree | aa1e5db948ec5bc91684e4a09c28f83290d9d004 /src/amd | |
parent | 34d481fd1f689805e0f41a8907bd00f96270fbfc (diff) |
aco: improve code for f2{i,u}{8,16}
Use sub-dword definitions so that the RA can use SDWA
No fossil-db changes.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5245>
Diffstat (limited to 'src/amd')
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 38bf449c83b..0b59a7e2e91 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -2327,33 +2327,31 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_f2i8: case nir_op_f2i16: { Temp src = get_alu_src(ctx, instr->src[0]); + Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1); if (instr->src[0].src.ssa->bit_size == 16) - src = bld.vop1(aco_opcode::v_cvt_i16_f16, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i16_f16, Definition(tmp), src); else if (instr->src[0].src.ssa->bit_size == 32) - src = bld.vop1(aco_opcode::v_cvt_i32_f32, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i32_f32, Definition(tmp), src); else - src = bld.vop1(aco_opcode::v_cvt_i32_f64, bld.def(v1), src); + src = bld.vop1(aco_opcode::v_cvt_i32_f64, Definition(tmp), src); - if (dst.type() == RegType::vgpr) - bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u)); - else + if (dst.type() != RegType::vgpr) bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); break; } case nir_op_f2u8: case nir_op_f2u16: { Temp src = get_alu_src(ctx, instr->src[0]); + Temp tmp = dst.type() == RegType::vgpr ? dst : bld.tmp(v1); if (instr->src[0].src.ssa->bit_size == 16) - src = bld.vop1(aco_opcode::v_cvt_u16_f16, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u16_f16, Definition(tmp), src); else if (instr->src[0].src.ssa->bit_size == 32) - src = bld.vop1(aco_opcode::v_cvt_u32_f32, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u32_f32, Definition(tmp), src); else - src = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), src); + bld.vop1(aco_opcode::v_cvt_u32_f64, Definition(tmp), src); - if (dst.type() == RegType::vgpr) - bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u)); - else - bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src); + if (dst.type() != RegType::vgpr) + bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), tmp); break; } case nir_op_f2i32: { |