aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2016-01-25 11:05:52 -0800
committerMatt Turner <[email protected]>2016-02-01 10:43:57 -0800
commit9b8786eba95532d53c45d52059a44abd7ee93530 (patch)
tree490d7b4a39bf10942937cf47cbae0fbd05b56543 /src
parent1dc312e295c66ab8674d2f47f859e310f607b2ed (diff)
nir: Add lowering support for packing opcodes.
Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/compiler/nir/nir.h4
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c32
-rw-r--r--src/compiler/nir/nir_opcodes.py10
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py20
4 files changed, 66 insertions, 0 deletions
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index fa53d818fb7..da55affe388 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1469,6 +1469,10 @@ typedef struct nir_shader_compiler_options {
bool lower_ffract;
bool lower_pack_half_2x16;
+ bool lower_pack_unorm_2x16;
+ bool lower_pack_snorm_2x16;
+ bool lower_pack_unorm_4x8;
+ bool lower_pack_snorm_4x8;
bool lower_unpack_half_2x16;
bool lower_extract_byte;
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 5372fbeed88..312d2f99a1c 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -134,6 +134,38 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
return;
}
+ case nir_op_pack_uvec2_to_uint: {
+ assert(b->shader->options->lower_pack_snorm_2x16 ||
+ b->shader->options->lower_pack_unorm_2x16);
+
+ nir_ssa_def *word =
+ nir_extract_u16(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_ssa_def *val =
+ nir_ior(b, nir_ishl(b, nir_channel(b, word, 1), nir_imm_int(b, 16)),
+ nir_channel(b, word, 0));
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+ nir_instr_remove(&instr->instr);
+ break;
+ }
+
+ case nir_op_pack_uvec4_to_uint: {
+ assert(b->shader->options->lower_pack_snorm_4x8 ||
+ b->shader->options->lower_pack_unorm_4x8);
+
+ nir_ssa_def *byte =
+ nir_extract_u8(b, instr->src[0].src.ssa, nir_imm_int(b, 0));
+ nir_ssa_def *val =
+ nir_ior(b, nir_ior(b, nir_ishl(b, nir_channel(b, byte, 3), nir_imm_int(b, 24)),
+ nir_ishl(b, nir_channel(b, byte, 2), nir_imm_int(b, 16))),
+ nir_ior(b, nir_ishl(b, nir_channel(b, byte, 1), nir_imm_int(b, 8)),
+ nir_channel(b, byte, 0)));
+
+ nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val));
+ nir_instr_remove(&instr->instr);
+ break;
+ }
+
case nir_op_fdph: {
nir_ssa_def *sum[4];
for (unsigned i = 0; i < 3; i++) {
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 5f5a0efd2d6..a37fe2dc060 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -237,6 +237,16 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
+unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+dst.x = (src0.x & 0xffff) | (src0.y >> 16);
+""")
+
+unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+dst.x = (src0.x << 0) |
+ (src0.y << 8) |
+ (src0.z << 16) |
+ (src0.w << 24);
+""")
# Lowered floating point unpacking operations.
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 50d37ea37f1..a279fbc482f 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -258,6 +258,26 @@ optimizations = [
(('extract_u16', a, b),
('iand', ('ushr', a, ('imul', b, 16)), 0xffff),
'options->lower_extract_word'),
+
+ (('pack_unorm_2x16', 'v'),
+ ('pack_uvec2_to_uint',
+ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))),
+ 'options->lower_pack_unorm_2x16'),
+
+ (('pack_unorm_4x8', 'v'),
+ ('pack_uvec4_to_uint',
+ ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))),
+ 'options->lower_pack_unorm_4x8'),
+
+ (('pack_snorm_2x16', 'v'),
+ ('pack_uvec2_to_uint',
+ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))),
+ 'options->lower_pack_snorm_2x16'),
+
+ (('pack_snorm_4x8', 'v'),
+ ('pack_uvec4_to_uint',
+ ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))),
+ 'options->lower_pack_snorm_4x8'),
]
# Add optimizations to handle the case where the result of a ternary is