aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/compiler/nir/nir.h3
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py32
-rw-r--r--src/intel/compiler/brw_compiler.c1
3 files changed, 36 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 9f6d2c5895b..04080b5e1df 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3019,6 +3019,9 @@ typedef struct nir_shader_compiler_options {
*/
bool intel_vec4;
+ /** Lower nir_op_ibfe and nir_op_ubfe that have two constant sources. */
+ bool lower_bfe_with_two_constants;
+
/** Whether 8-bit ALU is supported. */
bool support_8bit_alu;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 1f8f6fd1d6f..2d74a85711d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1898,6 +1898,38 @@ late_optimizations = [
# any conversions that could have been removed will have been removed in
# nir_opt_algebraic so any remaining ones are required.
(('f2fmp', a), ('f2f16', a)),
+
+ # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
+ #
+ # If bits is zero, the result will be zero.
+ #
+ # These prevent the next two lowerings generating incorrect results when
+ # count is zero.
+ (('ubfe', a, b, 0), 0),
+ (('ibfe', a, b, 0), 0),
+
+ # On Intel GPUs, BFE is a 3-source instruction. Like all 3-source
+ # instructions on Intel GPUs, it cannot have an immediate values as
+ # sources. There are also limitations on source register strides. As a
+ # result, it is very easy for 3-source instruction combined with either
+ # loads of immediate values or copies from weird register strides to be
+ # more expensive than the primitive instructions it represents.
+ (('ubfe', a, '#b', '#c'), ('iand', ('ushr', 0xffffffff, ('ineg', c)), ('ushr', a, b)), 'options->lower_bfe_with_two_constants'),
+
+ # b is the lowest order bit to be extracted and c is the number of bits to
+ # extract. The inner shift removes the bits above b + c by shifting left
+ # 32 - (b + c). ishl only sees the low 5 bits of the shift count, which is
+ # -(b + c). The outer shift moves the bit that was at b to bit zero.
+ # After the first shift, that bit is now at b + (32 - (b + c)) or 32 - c.
+ # This means that it must be shifted right by 32 - c or -c bits.
+ (('ibfe', a, '#b', '#c'), ('ishr', ('ishl', a, ('ineg', ('iadd', b, c))), ('ineg', c)), 'options->lower_bfe_with_two_constants'),
+
+ # Clean up no-op shifts that may result from the bfe lowerings.
+ (('ishl', a, 0), a),
+ (('ishl', a, -32), a),
+ (('ishr', a, 0), a),
+ (('ishr', a, -32), a),
+ (('ushr', a, 0), a),
]
for op in ['fadd']:
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index cede7b44879..0c28b81e912 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -66,6 +66,7 @@
.lower_unpack_unorm_4x8 = true, \
.lower_usub_sat64 = true, \
.lower_hadd64 = true, \
+ .lower_bfe_with_two_constants = true, \
.max_unroll_iterations = 32
static const struct nir_shader_compiler_options scalar_nir_options = {