summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Romanick <[email protected]>2016-09-01 15:21:04 -0700
committerIan Romanick <[email protected]>2017-01-20 15:41:23 -0800
commit3460d05a718f3859a77fe100f3972095d194be26 (patch)
tree0367cda6b97e8afd4ef5a23a1bf88381e9e356fd
parent3ca0029a0dc7b2ed2c61de72ef16e7ad1831c101 (diff)
nir: Add 64-bit integer support for conversions and bitcasts
v2 (idr): "cut them down later" => Remove ir_unop_b2u64 and ir_unop_u642b. Handle these with extra i2u or u2i casts just like uint(bool) and bool(uint) conversion is done. v3 (idr): Make the "from" type in a cast unsized. This reduces the number of required cast operations at the expensive slightly more complex code. However, this will be a dramatic improvement when other sized integer types are added. Suggested by Connor. Signed-off-by: Ian Romanick <[email protected]> Reviewed-by: Connor Abbott <[email protected]>
-rw-r--r--src/compiler/glsl/glsl_to_nir.cpp37
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c1
-rw-r--r--src/compiler/nir/nir_opcodes.py30
3 files changed, 67 insertions, 1 deletions
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index f4c3d01e723..7a1c1f4c8a2 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1480,6 +1480,7 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break;
case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break;
case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break;
+ case ir_unop_b2i64:result = nir_b2i64(&b, srcs[0]); break;
case ir_unop_d2f: result = nir_d2f(&b, srcs[0]); break;
case ir_unop_f2d: result = nir_f2d(&b, srcs[0]); break;
case ir_unop_d2i: result = nir_d2i(&b, srcs[0]); break;
@@ -1493,12 +1494,40 @@ nir_visitor::visit(ir_expression *ir)
assert(supports_ints);
result = nir_u2d(&b, srcs[0]);
break;
+ case ir_unop_i642i: result = nir_i2i32(&b, srcs[0]); break;
+ case ir_unop_i642u: result = nir_i2u32(&b, srcs[0]); break;
+ case ir_unop_i642f: result = nir_i642f(&b, srcs[0]); break;
+ case ir_unop_i642d: result = nir_i642d(&b, srcs[0]); break;
+
+ case ir_unop_u642i: result = nir_u2i32(&b, srcs[0]); break;
+ case ir_unop_u642u: result = nir_u2u32(&b, srcs[0]); break;
+ case ir_unop_u642f: result = nir_u642f(&b, srcs[0]); break;
+ case ir_unop_u642d: result = nir_u642d(&b, srcs[0]); break;
+
+ case ir_unop_i2i64: result = nir_i2i64(&b, srcs[0]); break;
+ case ir_unop_u2i64: result = nir_u2i64(&b, srcs[0]); break;
+ case ir_unop_f2i64:
+ case ir_unop_d2i64:
+ result = nir_f2i64(&b, srcs[0]);
+ break;
+ case ir_unop_i2u64: result = nir_i2u64(&b, srcs[0]); break;
+ case ir_unop_u2u64: result = nir_u2u64(&b, srcs[0]); break;
+ case ir_unop_f2u64:
+ case ir_unop_d2u64:
+ result = nir_f2u64(&b, srcs[0]);
+ break;
case ir_unop_i2u:
case ir_unop_u2i:
+ case ir_unop_i642u64:
+ case ir_unop_u642i64:
case ir_unop_bitcast_i2f:
case ir_unop_bitcast_f2i:
case ir_unop_bitcast_u2f:
case ir_unop_bitcast_f2u:
+ case ir_unop_bitcast_i642d:
+ case ir_unop_bitcast_d2i64:
+ case ir_unop_bitcast_u642d:
+ case ir_unop_bitcast_d2u64:
case ir_unop_subroutine_to_int:
/* no-op */
result = nir_imov(&b, srcs[0]);
@@ -1552,6 +1581,14 @@ nir_visitor::visit(ir_expression *ir)
case ir_unop_unpack_double_2x32:
result = nir_unpack_double_2x32(&b, srcs[0]);
break;
+ case ir_unop_pack_int_2x32:
+ case ir_unop_pack_uint_2x32:
+ result = nir_pack_int_2x32(&b, srcs[0]);
+ break;
+ case ir_unop_unpack_int_2x32:
+ case ir_unop_unpack_uint_2x32:
+ result = nir_unpack_int_2x32(&b, srcs[0]);
+ break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
break;
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index fa18debd850..8a967c56831 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -189,6 +189,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
}
case nir_op_unpack_double_2x32:
+ case nir_op_unpack_int_2x32:
return false;
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 7045c953bef..2e5a665fb39 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -95,6 +95,7 @@ tuint = "uint"
tfloat32 = "float32"
tint32 = "int32"
tuint32 = "uint32"
+tint64 = "int64"
tuint64 = "uint64"
tfloat64 = "float64"
@@ -171,14 +172,30 @@ unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
+unop_convert("i2i32", tint32, tint, "src0") # General int (int8_t, int64_t, etc.) to int32_t conversion
+unop_convert("u2i32", tint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int32_t conversion
+unop_convert("i2u32", tuint32, tint, "src0") # General int (int8_t, int64_t, etc.) to uint32_t conversion
+unop_convert("u2u32", tuint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion
+unop_convert("i2i64", tint64, tint, "src0") # General int (int8_t, int32_t, etc.) to int64_t conversion
+unop_convert("u2i64", tint64, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int64_t conversion
+unop_convert("f2i64", tint64, tfloat, "src0") # General float (float or double) to int64_t conversion
+unop_convert("i2u64", tuint64, tint, "src0") # General int (int8_t, int64_t, etc.) to uint64_t conversion
+unop_convert("u2u64", tuint64, tuint, "src0") # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion
+unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion
+unop_convert("i642f", tfloat32, tint64, "src0") # int64_t-to-float conversion.
+unop_convert("i642d", tfloat64, tint64, "src0") # int64_t-to-double conversion.
+unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion.
+unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion.
+
# Float-to-boolean conversion
unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
# Boolean-to-float conversion
unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("i2b", tbool, tint, "src0 != 0")
unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0") # Boolean-to-int64_t conversion.
unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
# double-to-float conversion
@@ -270,9 +287,15 @@ dst.x = (src0.x << 0) |
unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32,
"dst.x = src0.x | ((uint64_t)src0.y << 32);")
+unop_horiz("pack_int_2x32", 1, tint64, 2, tint32,
+ "dst.x = src0.x | ((int64_t)src0.y << 32);")
+
unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64,
"dst.x = src0.x; dst.y = src0.x >> 32;")
+unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64,
+ "dst.x = src0.x; dst.y = src0.x >> 32;")
+
# Lowered floating point unpacking operations.
@@ -283,6 +306,8 @@ unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0")
unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32")
+unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0")
+unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32")
# Bit operations, part of ARB_gpu_shader5.
@@ -565,6 +590,9 @@ binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
binop_convert("pack_double_2x32_split", tuint64, tuint32, "",
"src0 | ((uint64_t)src1 << 32)")
+binop_convert("pack_int_2x32_split", tuint64, tuint32, "",
+ "src0 | ((uint64_t)src1 << 32)")
+
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32.