summaryrefslogtreecommitdiffstats
path: root/src/amd
diff options
context:
space:
mode:
authorSamuel Pitoiset <[email protected]>2020-04-13 09:57:33 +0200
committerMarge Bot <[email protected]>2020-04-13 11:51:17 +0000
commit729bdc0d70b0661c197e89b372f1b6d15e2acd69 (patch)
treef4902c19f57faec5bed2854e10f3708f117856cf /src/amd
parent6a8e5dde6684c477edddb044a38a6618bf38d014 (diff)
aco: fix f2i64/f2u64 with sgprs if the exponent computation overflow
This fixes f16->{i64,u64} conversions for +0/-0. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Daniel Schürmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4509>
Diffstat (limited to 'src/amd')
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7c88d0d75a6..1981b8ec931 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2381,9 +2381,9 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
if (src.type() == RegType::vgpr)
src = bld.as_uniform(src);
Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, Operand(0x80017u));
- exponent = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u));
- exponent = bld.sop2(aco_opcode::s_max_u32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent);
- exponent = bld.sop2(aco_opcode::s_min_u32, bld.def(s1), bld.def(s1, scc), Operand(64u), exponent);
+ exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u));
+ exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent);
+ exponent = bld.sop2(aco_opcode::s_min_i32, bld.def(s1), bld.def(s1, scc), Operand(64u), exponent);
Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), Operand(0x7fffffu), src);
Temp sign = bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand(31u));
mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), Operand(0x800000u), mantissa);
@@ -2454,8 +2454,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
if (src.type() == RegType::vgpr)
src = bld.as_uniform(src);
Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, Operand(0x80017u));
- exponent = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u));
- exponent = bld.sop2(aco_opcode::s_max_u32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent);
+ exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u));
+ exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent);
Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), Operand(0x7fffffu), src);
mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), Operand(0x800000u), mantissa);
Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), Operand(24u), exponent);