nir: update opcode definitions for different bit sizes

Some opcodes need explicit bitsizes, and sometimes we need to use the double version when constant folding. v2: fix output type for u2f (Iago) v3: do not change vecN opcodes to be float. The next commit will add infrastructure to enable 64-bit integer constant folding so this is isn't really necessary. Also, that created problems with source modifiers in some cases (Iago) v4 (Jason): - do not change bcsel to work in terms of floats - leave ldexp generic Squashed changes to handle different bit sizes when constant folding since otherwise we would break the build. v2: - Use the bit-size information from the opcode information if defined (Iago) - Use helpers to get type size and base type of nir_alu_type enum (Sam) - Do not fallback to sized types to guess bit-size information. (Jason) Squashed changes in i965 and gallium/nir drivers to support sized types. These functions should only see sized types, but we can't make that change until we make sure that nir uses the sized versions in all the relevant places. A later commit will address this. Signed-off-by: Iago Toral Quiroga <[email protected]> Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]> Reviewed-by: Jason Ekstrand <[email protected]> Reviewed-by: Samuel Iglesias Gonsálvez <[email protected]> Reviewed-by: Iago Toral Quiroga <[email protected]>
author: Connor Abbott <[email protected]> 2015-08-14 10:45:06 -0700
committer: Samuel Iglesias Gonsálvez <[email protected]> 2016-03-17 11:16:33 +0100
commit: 9076c4e289de0debf1fb2a7237bdeb9c11002347 (patch)
tree: 75e555d2db2023457a9b9c521f396ee88ce5f888 /src/compiler/nir/nir_opcodes.py
parent: 6700d7e423bb2d7c5f0b46740bd92b5e65679eaf (diff)
1 files changed, 71 insertions, 67 deletions
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index a37fe2dc060..553f924afc5 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -90,8 +90,12 @@ class Opcode(object):
 # helper variables for strings
 tfloat = "float"
 tint = "int"
-tbool = "bool"
+tbool = "bool32"
 tuint = "uint"
+tfloat32 = "float32"
+tint32 = "int32"
+tuint32 = "uint32"
+tfloat64 = "float64"
 
 commutative = "commutative "
 associative = "associative "
@@ -155,56 +159,56 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
 unop("fsqrt", tfloat, "sqrtf(src0)")
 unop("fexp2", tfloat, "exp2f(src0)")
 unop("flog2", tfloat, "log2f(src0)")
-unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
-unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
 # Float-to-boolean conversion
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
 # Boolean-to-float conversion
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
 # Int-to-boolean conversion
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
 
 # Unary floating-point rounding operations.
 
 
-unop("ftrunc", tfloat, "truncf(src0)")
-unop("fceil", tfloat, "ceilf(src0)")
-unop("ffloor", tfloat, "floorf(src0)")
-unop("ffract", tfloat, "src0 - floorf(src0)")
-unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
 
 
 # Trigonometric operations.
 
 
-unop("fsin", tfloat, "sinf(src0)")
-unop("fcos", tfloat, "cosf(src0)")
+unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
+unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
 
 
 # Partial derivatives.
 
 
-unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0f")
-unop("fddx_fine", tfloat, "0.0f")
-unop("fddy_fine", tfloat, "0.0f")
-unop("fddx_coarse", tfloat, "0.0f")
-unop("fddy_coarse", tfloat, "0.0f")
+unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0")
+unop("fddx_fine", tfloat, "0.0")
+unop("fddy_fine", tfloat, "0.0")
+unop("fddx_coarse", tfloat, "0.0")
+unop("fddy_coarse", tfloat, "0.0")
 
 
 # Floating point pack and unpack operations.
 
 def pack_2x16(fmt):
-   unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+   unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
 dst.x = (uint32_t) pack_fmt_1x16(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
 """.replace("fmt", fmt))
 
 def pack_4x8(fmt):
-   unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+   unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
 dst.x = (uint32_t) pack_fmt_1x8(src0.x);
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
 dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -212,13 +216,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
 """.replace("fmt", fmt))
 
 def unpack_2x16(fmt):
-   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+   unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
 dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
 dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
 """.replace("fmt", fmt))
 
 def unpack_4x8(fmt):
-   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+   unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
 dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
 dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
 dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -237,11 +241,11 @@ unpack_2x16("unorm")
 unpack_4x8("unorm")
 unpack_2x16("half")
 
-unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
 dst.x = (src0.x & 0xffff) | (src0.y >> 16);
 """)
 
-unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
 dst.x = (src0.x <<  0) |
         (src0.y <<  8) |
         (src0.z << 16) |
@@ -251,22 +255,22 @@ dst.x = (src0.x <<  0) |
 # Lowered floating point unpacking operations.
 
 
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
            "unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
            "unpack_half_1x16((uint16_t)(src0.x >> 16))")
 
 
 # Bit operations, part of ARB_gpu_shader5.
 
 
-unop("bitfield_reverse", tuint, """
+unop("bitfield_reverse", tuint32, """
 /* we're not winning any awards for speed here, but that's ok */
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++)
    dst |= ((src0 >> bit) & 1) << (31 - bit);
 """)
-unop("bit_count", tuint, """
+unop("bit_count", tuint32, """
 dst = 0;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1)
@@ -274,7 +278,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
 }
 """)
 
-unop_convert("ufind_msb", tint, tuint, """
+unop_convert("ufind_msb", tint32, tuint32, """
 dst = -1;
 for (int bit = 31; bit > 0; bit--) {
    if ((src0 >> bit) & 1) {
@@ -284,7 +288,7 @@ for (int bit = 31; bit > 0; bit--) {
 }
 """)
 
-unop("ifind_msb", tint, """
+unop("ifind_msb", tint32, """
 dst = -1;
 for (int bit = 31; bit >= 0; bit--) {
    /* If src0 < 0, we're looking for the first 0 bit.
@@ -298,7 +302,7 @@ for (int bit = 31; bit >= 0; bit--) {
 }
 """)
 
-unop("find_lsb", tint, """
+unop("find_lsb", tint32, """
 dst = -1;
 for (unsigned bit = 0; bit < 32; bit++) {
    if ((src0 >> bit) & 1) {
@@ -358,10 +362,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
 # low 32-bits of signed/unsigned integer multiply
 binop("imul", tint, commutative + associative, "src0 * src1")
 # high 32-bits of signed integer multiply
-binop("imul_high", tint, commutative,
+binop("imul_high", tint32, commutative,
       "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
 # high 32-bits of unsigned integer multiply
-binop("umul_high", tuint, commutative,
+binop("umul_high", tuint32, commutative,
       "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
 
 binop("fdiv", tfloat, "", "src0 / src1")
@@ -412,18 +416,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
 
 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
 
-binop_reduce("fall_equal",  1, tfloat, tfloat, "{src0} == {src1}",
+binop_reduce("fall_equal",  1, tfloat32, tfloat32, "{src0} == {src1}",
              "{src0} && {src1}", "{src} ? 1.0f : 0.0f")
-binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
              "{src0} || {src1}", "{src} ? 1.0f : 0.0f")
 
 # These comparisons for integer-less hardware return 1.0 and 0.0 for true
 # and false respectively
 
-binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
-binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
-binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
-binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
 
 
 binop("ishl", tint, "", "src0 << src1")
@@ -446,11 +450,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
 # These use (src != 0.0) for testing the truth of the input, and output 1.0
 # for true and 0.0 for false
 
-binop("fand", tfloat, commutative,
+binop("fand", tfloat32, commutative,
       "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("for", tfloat, commutative,
+binop("for", tfloat32, commutative,
       "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("fxor", tfloat, commutative,
+binop("fxor", tfloat32, commutative,
       "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
 
 binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -472,7 +476,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
 binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
 
 # Saturated vector add for 4 8bit ints.
-binop("usadd_4x8", tint, commutative + associative, """
+binop("usadd_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
@@ -480,7 +484,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # Saturated vector subtract for 4 8bit ints.
-binop("ussub_4x8", tint, "", """
+binop("ussub_4x8", tint32, "", """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    int src0_chan = (src0 >> i) & 0xff;
@@ -491,7 +495,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # vector min for 4 8bit ints.
-binop("umin_4x8", tint, commutative + associative, """
+binop("umin_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -499,7 +503,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # vector max for 4 8bit ints.
-binop("umax_4x8", tint, commutative + associative, """
+binop("umax_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -507,7 +511,7 @@ for (int i = 0; i < 32; i += 8) {
 """)
 
 # unorm multiply: (a * b) / 255.
-binop("umul_unorm_4x8", tint, commutative + associative, """
+binop("umul_unorm_4x8", tint32, commutative + associative, """
 dst = 0;
 for (int i = 0; i < 32; i += 8) {
    int src0_chan = (src0 >> i) & 0xff;
@@ -516,15 +520,15 @@ for (int i = 0; i < 32; i += 8) {
 }
 """)
 
-binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
 
-binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
             "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
 
 # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
 # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
 # if either of its arguments are 32.
-binop_convert("bfm", tuint, tint, "", """
+binop_convert("bfm", tuint32, tint32, "", """
 int bits = src0, offset = src1;
 if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
    dst = 0; /* undefined */
@@ -533,7 +537,7 @@ else
 """)
 
 opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
-dst = ldexpf(src0, src1);
+dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
 /* flush denormals to zero. */
 if (!isnormal(dst))
    dst = copysignf(0.0f, src0);
@@ -573,12 +577,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
 # bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
 
 
-triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
 opcode("bcsel", 0, tuint, [0, 0, 0],
       [tbool, tuint, tuint], "", "src0 ? src1 : src2")
 
 # SM5 bfi assembly
-triop("bfi", tuint, """
+triop("bfi", tuint32, """
 unsigned mask = src0, insert = src1, base = src2;
 if (mask == 0) {
    dst = base;
@@ -593,8 +597,8 @@ if (mask == 0) {
 """)
 
 # SM5 ubfe/ibfe assembly
-opcode("ubfe", 0, tuint,
-       [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubfe", 0, tuint32,
+       [0, 0, 0], [tuint32, tint32, tint32], "", """
 unsigned base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -607,8 +611,8 @@ if (bits == 0) {
    dst = base >> offset;
 }
 """)
-opcode("ibfe", 0, tint,
-       [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibfe", 0, tint32,
+       [0, 0, 0], [tint32, tint32, tint32], "", """
 int base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -623,8 +627,8 @@ if (bits == 0) {
 """)
 
 # GLSL bitfieldExtract()
-opcode("ubitfield_extract", 0, tuint,
-       [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint32,
+       [0, 0, 0], [tuint32, tint32, tint32], "", """
 unsigned base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -635,8 +639,8 @@ if (bits == 0) {
    dst = (base >> offset) & ((1ull << bits) - 1);
 }
 """)
-opcode("ibitfield_extract", 0, tint,
-       [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibitfield_extract", 0, tint32,
+       [0, 0, 0], [tint32, tint32, tint32], "", """
 int base = src0;
 int offset = src1, bits = src2;
 if (bits == 0) {
@@ -663,8 +667,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
           [tuint, tuint, tuint, tuint],
           "", const_expr)
 
-opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
-       [tuint, tuint, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
+       [tuint32, tuint32, tint32, tint32], "", """
 unsigned base = src0, insert = src1;
 int offset = src2, bits = src3;
 if (bits == 0) {
author	Connor Abbott <[email protected]>	2015-08-14 10:45:06 -0700
committer	Samuel Iglesias Gonsálvez <[email protected]>	2016-03-17 11:16:33 +0100
commit	9076c4e289de0debf1fb2a7237bdeb9c11002347 (patch)
tree	75e555d2db2023457a9b9c521f396ee88ce5f888 /src/compiler/nir/nir_opcodes.py
parent	6700d7e423bb2d7c5f0b46740bd92b5e65679eaf (diff)