aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/nir/nir.h6
-rw-r--r--src/compiler/nir/nir_opcodes.py28
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py15
3 files changed, 18 insertions, 31 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 0b3745be8b1..9feed169e47 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2282,18 +2282,16 @@ typedef struct nir_shader_compiler_options {
bool lower_fmod;
/** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
bool lower_bitfield_extract;
- /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */
+ /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
bool lower_bitfield_extract_to_shifts;
/** Lowers bitfield_insert to bfi/bfm */
bool lower_bitfield_insert;
- /** Lowers bitfield_insert to bfm, compares, and shifts. */
+ /** Lowers bitfield_insert to compares, and shifts. */
bool lower_bitfield_insert_to_shifts;
/** Lowers bitfield_reverse to shifts. */
bool lower_bitfield_reverse;
/** Lowers bit_count to shifts. */
bool lower_bit_count;
- /** Lowers bfm to shifts and subtracts. */
- bool lower_bfm;
/** Lowers ifind_msb to compare and ufind_msb */
bool lower_ifind_msb;
/** Lowers find_lsb to ufind_msb and logic ops */
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 1ab4a3e7a31..8771c74b033 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -787,14 +787,12 @@ binop_convert("pack_32_2x16_split", tuint32, tuint16, "",
"src0 | ((uint32_t)src1 << 16)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
-# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
-# if either of its arguments are 32.
+# and that of the "bfi1" i965 instruction. That is, the bits and offset values
+# are from the low five bits of src0 and src1, respectively.
binop_convert("bfm", tuint32, tint32, "", """
-int bits = src0, offset = src1;
-if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
- dst = 0; /* undefined */
-else
- dst = ((1u << bits) - 1) << offset;
+int bits = src0 & 0x1F;
+int offset = src1 & 0x1F;
+dst = ((1u << bits) - 1) << offset;
""")
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
@@ -873,15 +871,14 @@ if (mask == 0) {
}
""")
-# SM5 ubfe/ibfe assembly
+# SM5 ubfe/ibfe assembly: only the 5 least significant bits of offset and bits are used.
opcode("ubfe", 0, tuint32,
- [0, 0, 0], [tuint32, tint32, tint32], False, "", """
+ [0, 0, 0], [tuint32, tuint32, tuint32], False, "", """
unsigned base = src0;
-int offset = src1, bits = src2;
+unsigned offset = src1 & 0x1F;
+unsigned bits = src2 & 0x1F;
if (bits == 0) {
dst = 0;
-} else if (bits < 0 || offset < 0) {
- dst = 0; /* undefined */
} else if (offset + bits < 32) {
dst = (base << (32 - bits - offset)) >> (32 - bits);
} else {
@@ -889,13 +886,12 @@ if (bits == 0) {
}
""")
opcode("ibfe", 0, tint32,
- [0, 0, 0], [tint32, tint32, tint32], False, "", """
+ [0, 0, 0], [tint32, tuint32, tuint32], False, "", """
int base = src0;
-int offset = src1, bits = src2;
+unsigned offset = src1 & 0x1F;
+unsigned bits = src2 & 0x1F;
if (bits == 0) {
dst = 0;
-} else if (bits < 0 || offset < 0) {
- dst = 0; /* undefined */
} else if (offset + bits < 32) {
dst = (base << (32 - bits - offset)) >> (32 - bits);
} else {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 36e576b7eff..c7e69c0310c 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -794,18 +794,11 @@ optimizations.extend([
# Alternative lowering that doesn't rely on bfi.
(('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
- ('bcsel', ('ilt', 31, 'bits'),
- 'insert',
- ('ior',
- ('iand', 'base', ('inot', ('bfm', 'bits', 'offset'))),
- ('iand', ('ishl', 'insert', 'offset'), ('bfm', 'bits', 'offset')))),
+ ('ior',
+ ('iand', 'base', ('inot', ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
+ ('iand', ('ishl', 'insert', 'offset'), ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'))),
'options->lower_bitfield_insert_to_shifts'),
- # bfm lowering -- note that the NIR opcode is undefined if either arg is 32.
- (('bfm', 'bits', 'offset'),
- ('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'),
- 'options->lower_bfm'),
-
(('ibitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ilt', 31, 'bits'), 'value',
('ibfe', 'value', 'offset', 'bits')),
@@ -829,7 +822,7 @@ optimizations.extend([
('ushr', 'value', 'offset'),
('bcsel', ('ieq', 'bits', 32),
0xffffffff,
- ('bfm', 'bits', 0))),
+ ('isub', ('ishl', 1, 'bits'), 1))),
'options->lower_bitfield_extract_to_shifts'),
(('ifind_msb', 'value'),