diff options
author | Ian Romanick <[email protected]> | 2016-09-01 15:21:04 -0700 |
---|---|---|
committer | Ian Romanick <[email protected]> | 2017-01-20 15:41:23 -0800 |
commit | 3460d05a718f3859a77fe100f3972095d194be26 (patch) | |
tree | 0367cda6b97e8afd4ef5a23a1bf88381e9e356fd | |
parent | 3ca0029a0dc7b2ed2c61de72ef16e7ad1831c101 (diff) |
nir: Add 64-bit integer support for conversions and bitcasts
v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and
ir_unop_u642b. Handle these with extra i2u or u2i casts just like
uint(bool) and bool(uint) conversion is done.
v3 (idr): Make the "from" type in a cast unsized. This reduces the
number of required cast operations at the expensive slightly more
complex code. However, this will be a dramatic improvement when other
sized integer types are added. Suggested by Connor.
Signed-off-by: Ian Romanick <[email protected]>
Reviewed-by: Connor Abbott <[email protected]>
-rw-r--r-- | src/compiler/glsl/glsl_to_nir.cpp | 37 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_alu_to_scalar.c | 1 | ||||
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 30 |
3 files changed, 67 insertions, 1 deletions
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index f4c3d01e723..7a1c1f4c8a2 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -1480,6 +1480,7 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break; case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break; case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break; + case ir_unop_b2i64:result = nir_b2i64(&b, srcs[0]); break; case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break; case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break; case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break; @@ -1493,12 +1494,40 @@ nir_visitor::visit(ir_expression *ir) assert(supports_ints); result = nir_u2d(&b, srcs[0]); break; + case ir_unop_i642i: result = nir_i2i32(&b, srcs[0]); break; + case ir_unop_i642u: result = nir_i2u32(&b, srcs[0]); break; + case ir_unop_i642f: result = nir_i642f(&b, srcs[0]); break; + case ir_unop_i642d: result = nir_i642d(&b, srcs[0]); break; + + case ir_unop_u642i: result = nir_u2i32(&b, srcs[0]); break; + case ir_unop_u642u: result = nir_u2u32(&b, srcs[0]); break; + case ir_unop_u642f: result = nir_u642f(&b, srcs[0]); break; + case ir_unop_u642d: result = nir_u642d(&b, srcs[0]); break; + + case ir_unop_i2i64: result = nir_i2i64(&b, srcs[0]); break; + case ir_unop_u2i64: result = nir_u2i64(&b, srcs[0]); break; + case ir_unop_f2i64: + case ir_unop_d2i64: + result = nir_f2i64(&b, srcs[0]); + break; + case ir_unop_i2u64: result = nir_i2u64(&b, srcs[0]); break; + case ir_unop_u2u64: result = nir_u2u64(&b, srcs[0]); break; + case ir_unop_f2u64: + case ir_unop_d2u64: + result = nir_f2u64(&b, srcs[0]); + break; case ir_unop_i2u: case ir_unop_u2i: + case ir_unop_i642u64: + case ir_unop_u642i64: case ir_unop_bitcast_i2f: case ir_unop_bitcast_f2i: case ir_unop_bitcast_u2f: case ir_unop_bitcast_f2u: + case ir_unop_bitcast_i642d: + case ir_unop_bitcast_d2i64: + case ir_unop_bitcast_u642d: + case ir_unop_bitcast_d2u64: case ir_unop_subroutine_to_int: /* no-op */ result = nir_imov(&b, srcs[0]); @@ -1552,6 +1581,14 @@ nir_visitor::visit(ir_expression *ir) case ir_unop_unpack_double_2x32: result = nir_unpack_double_2x32(&b, srcs[0]); break; + case ir_unop_pack_int_2x32: + case ir_unop_pack_uint_2x32: + result = nir_pack_int_2x32(&b, srcs[0]); + break; + case ir_unop_unpack_int_2x32: + case ir_unop_unpack_uint_2x32: + result = nir_unpack_int_2x32(&b, srcs[0]); + break; case ir_unop_bitfield_reverse: result = nir_bitfield_reverse(&b, srcs[0]); break; diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c index fa18debd850..8a967c56831 100644 --- a/src/compiler/nir/nir_lower_alu_to_scalar.c +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -189,6 +189,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) } case nir_op_unpack_double_2x32: + case nir_op_unpack_int_2x32: return false; LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 7045c953bef..2e5a665fb39 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -95,6 +95,7 @@ tuint = "uint" tfloat32 = "float32" tint32 = "int32" tuint32 = "uint32" +tint64 = "int64" tuint64 = "uint64" tfloat64 = "float64" @@ -171,14 +172,30 @@ unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion. unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion. unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion. unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion. +unop_convert("i2i32", tint32, tint, "src0") # General int (int8_t, int64_t, etc.) to int32_t conversion +unop_convert("u2i32", tint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int32_t conversion +unop_convert("i2u32", tuint32, tint, "src0") # General int (int8_t, int64_t, etc.) to uint32_t conversion +unop_convert("u2u32", tuint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion +unop_convert("i2i64", tint64, tint, "src0") # General int (int8_t, int32_t, etc.) to int64_t conversion +unop_convert("u2i64", tint64, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int64_t conversion +unop_convert("f2i64", tint64, tfloat, "src0") # General float (float or double) to int64_t conversion +unop_convert("i2u64", tuint64, tint, "src0") # General int (int8_t, int64_t, etc.) to uint64_t conversion +unop_convert("u2u64", tuint64, tuint, "src0") # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion +unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion +unop_convert("i642f", tfloat32, tint64, "src0") # int64_t-to-float conversion. +unop_convert("i642d", tfloat64, tint64, "src0") # int64_t-to-double conversion. +unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion. +unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion. + # Float-to-boolean conversion unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f") unop_convert("d2b", tbool, tfloat64, "src0 != 0.0") # Boolean-to-float conversion unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion -unop_convert("i2b", tbool, tint32, "src0 != 0") +unop_convert("i2b", tbool, tint, "src0 != 0") unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion +unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0") # Boolean-to-int64_t conversion. unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion. unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion. # double-to-float conversion @@ -270,9 +287,15 @@ dst.x = (src0.x << 0) | unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32, "dst.x = src0.x | ((uint64_t)src0.y << 32);") +unop_horiz("pack_int_2x32", 1, tint64, 2, tint32, + "dst.x = src0.x | ((int64_t)src0.y << 32);") + unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64, "dst.x = src0.x; dst.y = src0.x >> 32;") +unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64, + "dst.x = src0.x; dst.y = src0.x >> 32;") + # Lowered floating point unpacking operations. @@ -283,6 +306,8 @@ unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32, unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0") unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32") +unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0") +unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32") # Bit operations, part of ARB_gpu_shader5. @@ -565,6 +590,9 @@ binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32, binop_convert("pack_double_2x32_split", tuint64, tuint32, "", "src0 | ((uint64_t)src1 << 32)") +binop_convert("pack_int_2x32_split", tuint64, tuint32, "", + "src0 | ((uint64_t)src1 << 32)") + # bfm implements the behavior of the first operation of the SM5 "bfi" assembly # and that of the "bfi1" i965 instruction. That is, it has undefined behavior # if either of its arguments are 32. |