aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler/nir/nir_opcodes.py
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2019-03-09 17:17:55 +0100
committerKarol Herbst <[email protected]>2019-12-21 11:00:17 +0000
commita8ec4082a41830cf67a4fd405402fd2d820722fd (patch)
treee2fc5c64508ce10f2402a7524eb65cb7a53e1199 /src/compiler/nir/nir_opcodes.py
parentb35e583c17c647dd5605220ef0e8db28b879aae0 (diff)
nir+vtn: vec8+vec16 support
This introduces new vec8 and vec16 instructions (which are the only instructions taking more than 4 sources), in order to construct 8 and 16 component vectors. In order to avoid fixing up the non-autogenerated nir_build_alu() sites and making them pass 16 src args for the benefit of the two instructions that take more than 4 srcs (ie vec8 and vec16), nir_build_alu() is has nir_build_alu_tail() split out and re-used by nir_build_alu2() (which is used for the > 4 src args case). v2 (Karol Herbst): use nir_build_alu2 for vec8 and vec16 use python's array multiplication syntax add nir_op_vec helper simplify nir_vec nir_build_alu_tail -> nir_builder_alu_instr_finish_and_insert use nir_build_alu for opcodes with <= 4 sources v3 (Karol Herbst): fix nir_serialize v4 (Dave Airlie): fix serialization of glsl_type handle vec8/16 in lowering of bools v5 (Karol Herbst): fix load store vectorizer Signed-off-by: Karol Herbst <[email protected]> Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_opcodes.py')
-rw-r--r--src/compiler/nir/nir_opcodes.py36
1 files changed, 35 insertions, 1 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 2ab04ed9b1d..86485e39508 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -75,7 +75,7 @@ class Opcode(object):
assert isinstance(algebraic_properties, str)
assert isinstance(const_expr, str)
assert len(input_sizes) == len(input_types)
- assert 0 <= output_size <= 4
+ assert 0 <= output_size <= 4 or (output_size == 8) or (output_size == 16)
for size in input_sizes:
assert 0 <= size <= 4
if output_size != 0:
@@ -1057,6 +1057,40 @@ dst.z = src2.x;
dst.w = src3.x;
""")
+opcode("vec8", 8, tuint,
+ [1] * 8, [tuint] * 8,
+ False, "", """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+dst.w = src3.x;
+dst.e = src4.x;
+dst.f = src5.x;
+dst.g = src6.x;
+dst.h = src7.x;
+""")
+
+opcode("vec16", 16, tuint,
+ [1] * 16, [tuint] * 16,
+ False, "", """
+dst.x = src0.x;
+dst.y = src1.x;
+dst.z = src2.x;
+dst.w = src3.x;
+dst.e = src4.x;
+dst.f = src5.x;
+dst.g = src6.x;
+dst.h = src7.x;
+dst.i = src8.x;
+dst.j = src9.x;
+dst.k = src10.x;
+dst.l = src11.x;
+dst.m = src12.x;
+dst.n = src13.x;
+dst.o = src14.x;
+dst.p = src15.x;
+""")
+
# An integer multiply instruction for address calculation. This is
# similar to imul, except that the results are undefined in case of
# overflow. Overflow is defined according to the size of the variable