diff options
author | Rob Clark <[email protected]> | 2019-03-09 17:17:55 +0100 |
---|---|---|
committer | Karol Herbst <[email protected]> | 2019-12-21 11:00:17 +0000 |
commit | a8ec4082a41830cf67a4fd405402fd2d820722fd (patch) | |
tree | e2fc5c64508ce10f2402a7524eb65cb7a53e1199 /src/compiler/nir/nir_opcodes.py | |
parent | b35e583c17c647dd5605220ef0e8db28b879aae0 (diff) |
nir+vtn: vec8+vec16 support
This introduces new vec8 and vec16 instructions (which are the only
instructions taking more than 4 sources), in order to construct 8 and 16
component vectors.
In order to avoid fixing up the non-autogenerated nir_build_alu() sites
and making them pass 16 src args for the benefit of the two instructions
that take more than 4 srcs (ie vec8 and vec16), nir_build_alu() is has
nir_build_alu_tail() split out and re-used by nir_build_alu2() (which is
used for the > 4 src args case).
v2 (Karol Herbst):
use nir_build_alu2 for vec8 and vec16
use python's array multiplication syntax
add nir_op_vec helper
simplify nir_vec
nir_build_alu_tail -> nir_builder_alu_instr_finish_and_insert
use nir_build_alu for opcodes with <= 4 sources
v3 (Karol Herbst):
fix nir_serialize
v4 (Dave Airlie):
fix serialization of glsl_type
handle vec8/16 in lowering of bools
v5 (Karol Herbst):
fix load store vectorizer
Signed-off-by: Karol Herbst <[email protected]>
Reviewed-by: Dave Airlie <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_opcodes.py')
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 2ab04ed9b1d..86485e39508 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -75,7 +75,7 @@ class Opcode(object): assert isinstance(algebraic_properties, str) assert isinstance(const_expr, str) assert len(input_sizes) == len(input_types) - assert 0 <= output_size <= 4 + assert 0 <= output_size <= 4 or (output_size == 8) or (output_size == 16) for size in input_sizes: assert 0 <= size <= 4 if output_size != 0: @@ -1057,6 +1057,40 @@ dst.z = src2.x; dst.w = src3.x; """) +opcode("vec8", 8, tuint, + [1] * 8, [tuint] * 8, + False, "", """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +dst.w = src3.x; +dst.e = src4.x; +dst.f = src5.x; +dst.g = src6.x; +dst.h = src7.x; +""") + +opcode("vec16", 16, tuint, + [1] * 16, [tuint] * 16, + False, "", """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +dst.w = src3.x; +dst.e = src4.x; +dst.f = src5.x; +dst.g = src6.x; +dst.h = src7.x; +dst.i = src8.x; +dst.j = src9.x; +dst.k = src10.x; +dst.l = src11.x; +dst.m = src12.x; +dst.n = src13.x; +dst.o = src14.x; +dst.p = src15.x; +""") + # An integer multiply instruction for address calculation. This is # similar to imul, except that the results are undefined in case of # overflow. Overflow is defined according to the size of the variable |