diff options
author | Matt Turner <[email protected]> | 2016-01-13 11:09:11 -0800 |
---|---|---|
committer | Matt Turner <[email protected]> | 2016-01-14 09:28:01 -0800 |
commit | b82e26a6a4d6baf121f44c61c862bfa79ba0d172 (patch) | |
tree | d8c97faf241b7ea0bd07ceccde1f59f5e7058986 /src | |
parent | 15640ee77ae601cba33cbbc72256e55e03a363e5 (diff) |
nir: Lower bitfield_extract.
The OpenGL specifications for bitfieldExtract() says:
The result will be undefined if <offset> or <bits> is negative, or if
the sum of <offset> and <bits> is greater than the number of bits
used to store the operand.
Therefore passing bits=32, offset=0 is legal and defined in GLSL.
But the earlier SM5 ubfe/ibfe opcodes are specified to accept a bitfield width
ranging from 0-31. As such, Intel and AMD instructions read only the low 5 bits
of the width operand, making them not able to implement the GLSL-specified
behavior directly.
This commit adds ubfe/ibfe operations from SM5 and a lowering pass for
bitfield_extract to to handle the trivial case of <bits> = 32 as
bitfieldExtract:
bits > 31 ? value : bfe(value, offset, bits)
Fixes:
ES31-CTS.shader_bitfield_operation.bitfieldExtract.uvec3_0
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92595
Reviewed-by: Connor Abbott <[email protected]>
Tested-by: Marta Lofstedt <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/glsl/nir/nir.h | 1 | ||||
-rw-r--r-- | src/glsl/nir/nir_opcodes.py | 31 | ||||
-rw-r--r-- | src/glsl/nir/nir_opt_algebraic.py | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 3 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 3 |
6 files changed, 49 insertions, 0 deletions
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 23aec694d95..11add65988c 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1447,6 +1447,7 @@ typedef struct nir_shader_compiler_options { bool lower_fsat; bool lower_fsqrt; bool lower_fmod; + bool lower_bitfield_extract; bool lower_bitfield_insert; bool lower_uadd_carry; bool lower_usub_borrow; diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 3e43438ff67..e79810c1991 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -573,6 +573,37 @@ if (mask == 0) { } """) +# SM5 ubfe/ibfe assembly +opcode("ubfe", 0, tuint, + [0, 0, 0], [tuint, tint, tint], "", """ +unsigned base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ +} else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); +} else { + dst = base >> offset; +} +""") +opcode("ibfe", 0, tint, + [0, 0, 0], [tint, tint, tint], "", """ +int base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ +} else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); +} else { + dst = base >> offset; +} +""") + +# GLSL bitfieldExtract() opcode("ubitfield_extract", 0, tuint, [0, 0, 0], [tuint, tint, tint], "", """ unsigned base = src0; diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 0d31e39ea8e..7745b76f7ce 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -232,6 +232,16 @@ optimizations = [ ('bcsel', ('ilt', 31, 'bits'), 'insert', ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), 'options->lower_bitfield_insert'), + + (('ibitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ilt', 31, 'bits'), 'value', + ('ibfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), + + (('ubitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ult', 31, 'bits'), 'value', + ('ubfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), ] # Add optimizations to handle the case where the result of a ternary is diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 874092558e0..d7bcc1c5374 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -1027,6 +1027,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_ubitfield_extract: case nir_op_ibitfield_extract: + unreachable("should have been lowered"); + case nir_op_ubfe: + case nir_op_ibfe: bld.BFE(result, op[2], op[1], op[0]); break; case nir_op_bfm: diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 0ac3f4a30fc..3a69c23446c 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -106,6 +106,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) nir_options->lower_fdiv = true; nir_options->lower_scmp = true; nir_options->lower_fmod = true; + nir_options->lower_bitfield_extract = true; nir_options->lower_bitfield_insert = true; nir_options->lower_uadd_carry = true; nir_options->lower_usub_borrow = true; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index ecca16663cf..0ae723f07e9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -1385,6 +1385,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) case nir_op_ubitfield_extract: case nir_op_ibitfield_extract: + unreachable("should have been lowered"); + case nir_op_ubfe: + case nir_op_ibfe: op[0] = fix_3src_operand(op[0]); op[1] = fix_3src_operand(op[1]); op[2] = fix_3src_operand(op[2]); |