nir: Change bfm's semantics to match Intel/AMD/SM5.

Intel/AMD's hardware instructions do not handle arguments of 32. Constant evaluation should not produce a result different from the hardware instruction. The s/1ull/1u/ change is intentional: previously we wanted defined behavior for the "1 << 32" case, but we're making this case undefined so we can make it 1u and save ourselves a 64-bit operation. Reviewed-by: Ian Romanick <[email protected]>
author: Matt Turner <[email protected]> 2016-01-07 16:16:35 -0800
committer: Matt Turner <[email protected]> 2016-01-13 11:22:40 -0800
commit: 74cff779eb5217fe2b791f2a23405096901f45fd (patch)
tree: 7ec2648c95a5ea3aaf12c8184cab1a2bc3a671b6 /src
parent: a5fcff6628c641d01954d0af4aee0e723a570cad (diff)
1 files changed, 6 insertions, 3 deletions
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index d7ba0b62375..1c65def691a 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -511,12 +511,15 @@ binop("fpow", tfloat, "", "powf(src0, src1)")
 binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
 
+# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
+# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
+# if either of its arguments are 32.
 binop_convert("bfm", tuint, tint, "", """
 int bits = src0, offset = src1;
-if (offset < 0 || bits < 0 || offset + bits > 32)
-   dst = 0; /* undefined per the spec */
+if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
+   dst = 0; /* undefined */
 else
-   dst = ((1ull << bits) - 1) << offset;
+   dst = ((1u << bits) - 1) << offset;
 """)
 
 opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
author	Matt Turner <[email protected]>	2016-01-07 16:16:35 -0800
committer	Matt Turner <[email protected]>	2016-01-13 11:22:40 -0800
commit	74cff779eb5217fe2b791f2a23405096901f45fd (patch)
tree	7ec2648c95a5ea3aaf12c8184cab1a2bc3a671b6 /src
parent	a5fcff6628c641d01954d0af4aee0e723a570cad (diff)