diff options
author | Jason Ekstrand <[email protected]> | 2017-03-07 19:54:37 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-03-14 07:36:40 -0700 |
commit | 762a6333f21fd8606f69db6060027c4522d46678 (patch) | |
tree | f77c695fa16a5d869175773229d0195c22060c93 /src/compiler/nir | |
parent | 7107b321557e421e33fe92221133cf4a08eb7c6c (diff) |
nir: Rework conversion opcodes
The NIR story on conversion opcodes is a mess. We've had way too many
of them, naming is inconsistent, and which ones have explicit sizes was
sort-of random. This commit re-organizes things and makes them all
consistent:
- All non-bool conversion opcodes now have the explicit size in the
destination and are named <src_type>2<dst_type><size>.
- Integer <-> integer conversion opcodes now only come in i2i and u2u
forms (i2u and u2i have been removed) since the only difference
between the different integer conversions is whether or not they
sign-extend when up-converting.
- Boolean conversion opcodes all have the explicit size on the bool and
are named <src_type>2<dst_type>.
Making things consistent also allows nir_type_conversion_op to be moved
to nir_opcodes.c and auto-generated using mako. This will make adding
int8, int16, and float16 versions much easier when the time comes.
Reviewed-by: Eric Anholt <[email protected]>
Diffstat (limited to 'src/compiler/nir')
-rw-r--r-- | src/compiler/nir/nir.c | 122 | ||||
-rw-r--r-- | src/compiler/nir/nir_builder.h | 4 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_double_ops.c | 4 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_idiv.c | 16 | ||||
-rw-r--r-- | src/compiler/nir/nir_lower_tex.c | 4 | ||||
-rw-r--r-- | src/compiler/nir/nir_opcodes.py | 54 | ||||
-rw-r--r-- | src/compiler/nir/nir_opcodes_c.py | 66 | ||||
-rw-r--r-- | src/compiler/nir/nir_opt_algebraic.py | 40 |
8 files changed, 121 insertions, 189 deletions
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c index 37fd9cb5c56..937b6300624 100644 --- a/src/compiler/nir/nir.c +++ b/src/compiler/nir/nir.c @@ -1958,125 +1958,3 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) unreachable("intrinsic doesn't produce a system value"); } } - -nir_op -nir_type_conversion_op(nir_alu_type src, nir_alu_type dst) -{ - nir_alu_type src_base_type = (nir_alu_type) nir_alu_type_get_base_type(src); - nir_alu_type dst_base_type = (nir_alu_type) nir_alu_type_get_base_type(dst); - unsigned src_bitsize = nir_alu_type_get_type_size(src); - unsigned dst_bitsize = nir_alu_type_get_type_size(dst); - - if (src_bitsize == dst_bitsize) { - switch (src_base_type) { - case nir_type_int: - case nir_type_uint: - if (dst_base_type == nir_type_uint || dst_base_type == nir_type_int) - return nir_op_imov; - break; - case nir_type_float: - if (dst_base_type == nir_type_float) - return nir_op_fmov; - break; - case nir_type_bool: - if (dst_base_type == nir_type_bool) - return nir_op_imov; - break; - default: - unreachable("Invalid conversion"); - } - } - - switch (src_base_type) { - case nir_type_int: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2i32 : nir_op_i2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_i2u32 : nir_op_i2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_i2f : nir_op_i2d; - case 64: - return (dst_bitsize == 32) ? nir_op_i642f : nir_op_i642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_uint: - switch (dst_base_type) { - case nir_type_int: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2i32 : nir_op_u2i64; - case nir_type_uint: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_u2u32 : nir_op_u2u64; - case nir_type_float: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_u2f : nir_op_u2d; - case 64: - return (dst_bitsize == 32) ? nir_op_u642f : nir_op_u642d; - default: - unreachable("Invalid conversion"); - } - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_i2b : nir_op_i642b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_float: - switch (dst_base_type) { - case nir_type_int: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2i : nir_op_f2i64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2i : nir_op_f2i64; - default: - unreachable("Invalid conversion"); - } - case nir_type_uint: - switch (src_bitsize) { - case 32: - return (dst_bitsize == 32) ? nir_op_f2u : nir_op_f2u64; - case 64: - return (dst_bitsize == 32) ? nir_op_d2u : nir_op_f2u64; - default: - unreachable("Invalid conversion"); - } - case nir_type_float: - assert(src_bitsize != dst_bitsize); - return (dst_bitsize == 32) ? nir_op_d2f : nir_op_f2d; - case nir_type_bool: - return (src_bitsize == 32) ? nir_op_f2b : nir_op_d2b; - default: - unreachable("Invalid conversion"); - } - - case nir_type_bool: - switch (dst_base_type) { - case nir_type_int: - case nir_type_uint: - return (dst_bitsize == 32) ? nir_op_b2i : nir_op_b2i64; - case nir_type_float: - /* GLSL just emits f2d(b2f(x)) for b2d */ - assert(dst_bitsize == 32); - return nir_op_b2f; - default: - unreachable("Invalid conversion"); - } - - default: - unreachable("Invalid conversion"); - } -} diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h index 1dc56ebf53d..a4f15b6d335 100644 --- a/src/compiler/nir/nir_builder.h +++ b/src/compiler/nir/nir_builder.h @@ -328,6 +328,10 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, } } + /* When in doubt, assume 32. */ + if (bit_size == 0) + bit_size = 32; + /* Make sure we don't swizzle from outside of our source vector (like if a * scalar value was passed into a multiply with a vector). */ diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c index ad9631327b4..00eeb89b1bd 100644 --- a/src/compiler/nir/nir_lower_double_ops.c +++ b/src/compiler/nir/nir_lower_double_ops.c @@ -116,7 +116,7 @@ lower_rcp(nir_builder *b, nir_ssa_def *src) /* cast to float, do an rcp, and then cast back to get an approximate * result */ - nir_ssa_def *ra = nir_f2d(b, nir_frcp(b, nir_d2f(b, src_norm))); + nir_ssa_def *ra = nir_f2f64(b, nir_frcp(b, nir_f2f32(b, src_norm))); /* Fixup the exponent of the result - note that we check if this is too * small below. @@ -180,7 +180,7 @@ lower_sqrt_rsq(nir_builder *b, nir_ssa_def *src, bool sqrt) nir_iadd(b, nir_imm_int(b, 1023), even)); - nir_ssa_def *ra = nir_f2d(b, nir_frsq(b, nir_d2f(b, src_norm))); + nir_ssa_def *ra = nir_f2f64(b, nir_frsq(b, nir_f2f32(b, src_norm))); nir_ssa_def *new_exp = nir_isub(b, get_exponent(b, ra), half); ra = set_exponent(b, ra, new_exp); diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c index 6726b718aaa..194ca5a75a8 100644 --- a/src/compiler/nir/nir_lower_idiv.c +++ b/src/compiler/nir/nir_lower_idiv.c @@ -56,15 +56,15 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) denom = nir_ssa_for_alu_src(bld, alu, 1); if (is_signed) { - af = nir_i2f(bld, numer); - bf = nir_i2f(bld, denom); + af = nir_i2f32(bld, numer); + bf = nir_i2f32(bld, denom); af = nir_fabs(bld, af); bf = nir_fabs(bld, bf); a = nir_iabs(bld, numer); b = nir_iabs(bld, denom); } else { - af = nir_u2f(bld, numer); - bf = nir_u2f(bld, denom); + af = nir_u2f32(bld, numer); + bf = nir_u2f32(bld, denom); a = numer; b = denom; } @@ -75,17 +75,17 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) q = nir_fmul(bld, af, bf); if (is_signed) { - q = nir_f2i(bld, q); + q = nir_f2i32(bld, q); } else { - q = nir_f2u(bld, q); + q = nir_f2u32(bld, q); } /* get error of first result: */ r = nir_imul(bld, q, b); r = nir_isub(bld, a, r); - r = nir_u2f(bld, r); + r = nir_u2f32(bld, r); r = nir_fmul(bld, r, bf); - r = nir_f2u(bld, r); + r = nir_f2u32(bld, r); /* add quotients: */ q = nir_iadd(bld, q, r); diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 213406aaa98..70054679955 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -121,7 +121,7 @@ lower_offset(nir_builder *b, nir_tex_instr *tex) nir_ssa_def *offset_coord; if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT); - offset_coord = nir_fadd(b, coord, nir_i2f(b, offset)); + offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); } else { offset_coord = nir_iadd(b, coord, offset); } @@ -176,7 +176,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex) nir_tex_instr_dest_size(txs), 32, NULL); nir_builder_instr_insert(b, &txs->instr); - return nir_i2f(b, &txs->dest.ssa); + return nir_i2f32(b, &txs->dest.ssa); } static void diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 8cad74832a6..52868d5f5a4 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -165,42 +165,26 @@ unop("frsq", tfloat, "bit_size == 64 ? 1.0 / sqrt(src0) : 1.0f / sqrtf(src0)") unop("fsqrt", tfloat, "bit_size == 64 ? sqrt(src0) : sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion. -unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion -unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion. -unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion. -unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion. -unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion. -unop_convert("i2i32", tint32, tint, "src0") # General int (int8_t, int64_t, etc.) to int32_t conversion -unop_convert("u2i32", tint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int32_t conversion -unop_convert("i2u32", tuint32, tint, "src0") # General int (int8_t, int64_t, etc.) to uint32_t conversion -unop_convert("u2u32", tuint32, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to uint32_t conversion -unop_convert("i2i64", tint64, tint, "src0") # General int (int8_t, int32_t, etc.) to int64_t conversion -unop_convert("u2i64", tint64, tuint, "src0") # General uint (uint8_t, uint64_t, etc.) to int64_t conversion -unop_convert("f2i64", tint64, tfloat, "src0") # General float (float or double) to int64_t conversion -unop_convert("i2u64", tuint64, tint, "src0") # General int (int8_t, int64_t, etc.) to uint64_t conversion -unop_convert("u2u64", tuint64, tuint, "src0") # General uint (uint8_t, uint32_t, etc.) to uint64_t conversion -unop_convert("f2u64", tuint64, tfloat, "src0") # General float (float or double) to uint64_t conversion -unop_convert("i642f", tfloat32, tint64, "src0") # int64_t-to-float conversion. -unop_convert("i642b", tbool, tint64, "src0") # int64_t-to-bool conversion. -unop_convert("i642d", tfloat64, tint64, "src0") # int64_t-to-double conversion. -unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion. -unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion. - -# Float-to-boolean conversion -unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f") -unop_convert("d2b", tbool, tfloat64, "src0 != 0.0") -# Boolean-to-float conversion -unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f") -# Int-to-boolean conversion + +# Generate all of the numeric conversion opcodes +for src_t in [tint, tuint, tfloat]: + if src_t in (tint, tuint): + dst_types = [tfloat, src_t] + elif src_t == tfloat: + dst_types = [tint, tuint, tfloat] + + for dst_t in dst_types: + for bit_size in [32, 64]: + unop_convert("{}2{}{}".format(src_t[0], dst_t[0], bit_size), + dst_t + str(bit_size), src_t, "src0") + +# We'll hand-code the to/from bool conversion opcodes. Because bool doesn't +# have multiple bit-sizes, we can always infer the size from the other type. +unop_convert("f2b", tbool, tfloat, "src0 != 0.0") unop_convert("i2b", tbool, tint, "src0 != 0") -unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0") # Boolean-to-int64_t conversion. -unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion. -unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion. -# double-to-float conversion -unop_convert("d2f", tfloat32, tfloat64, "src0") # Double to single precision -unop_convert("f2d", tfloat64, tfloat32, "src0") # Single to double precision +unop_convert("b2f", tfloat, tbool, "src0 ? 1.0 : 0.0") +unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") + # Unary floating-point rounding operations. diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py index 5f8bdc12a07..c66f3bc7ad4 100644 --- a/src/compiler/nir/nir_opcodes_c.py +++ b/src/compiler/nir/nir_opcodes_c.py @@ -29,6 +29,72 @@ from mako.template import Template template = Template(""" #include "nir.h" +nir_op +nir_type_conversion_op(nir_alu_type src, nir_alu_type dst) +{ + nir_alu_type src_base = (nir_alu_type) nir_alu_type_get_base_type(src); + nir_alu_type dst_base = (nir_alu_type) nir_alu_type_get_base_type(dst); + unsigned src_bit_size = nir_alu_type_get_type_size(src); + unsigned dst_bit_size = nir_alu_type_get_type_size(dst); + + if (src == dst && src_base == nir_type_float) { + return nir_op_fmov; + } else if ((src_base == nir_type_int || src_base == nir_type_uint) && + (dst_base == nir_type_int || dst_base == nir_type_uint) && + src_bit_size == dst_bit_size) { + /* Integer <-> integer conversions with the same bit-size on both + * ends are just no-op moves. + */ + return nir_op_imov; + } + + switch (src_base) { +% for src_t in ['int', 'uint', 'float']: + case nir_type_${src_t}: + switch (dst_base) { +% for dst_t in ['int', 'uint', 'float']: + case nir_type_${dst_t}: +% if src_t in ['int', 'uint'] and dst_t in ['int', 'uint']: +% if dst_t == 'int': +<% continue %> +% else: +<% dst_t = src_t %> +% endif +% endif + switch (dst_bit_size) { +% for dst_bits in [32, 64]: + case ${dst_bits}: + return ${'nir_op_{}2{}{}'.format(src_t[0], dst_t[0], dst_bits)}; +% endfor + default: + unreachable("Invalid nir alu bit size"); + } +% endfor + case nir_type_bool: +% if src_t == 'float': + return nir_op_f2b; +% else: + return nir_op_i2b; +% endif + default: + unreachable("Invalid nir alu base type"); + } +% endfor + case nir_type_bool: + switch (dst_base) { + case nir_type_int: + case nir_type_uint: + return nir_op_b2i; + case nir_type_float: + return nir_op_b2f; + default: + unreachable("Invalid nir alu base type"); + } + default: + unreachable("Invalid nir alu base type"); + } +} + const nir_op_info nir_op_infos[nir_num_opcodes] = { % for name, opcode in sorted(opcodes.iteritems()): { diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index f60c338b624..49c1460e25a 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -78,7 +78,7 @@ optimizations = [ (('ineg', ('ineg', a)), a), (('fabs', ('fabs', a)), ('fabs', a)), (('fabs', ('fneg', a)), ('fabs', a)), - (('fabs', ('u2f', a)), ('u2f', a)), + (('fabs', ('u2f32', a)), ('u2f32', a)), (('iabs', ('iabs', a)), ('iabs', a)), (('iabs', ('ineg', a)), ('iabs', a)), (('~fadd', a, 0.0), a), @@ -212,7 +212,7 @@ optimizations = [ (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), (('iand', 'a@bool', 1.0), ('b2f', a)), # True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True). - (('ineg', ('b2i', a)), a), + (('ineg', ('b2i@32', a)), a), (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. # Comparison with the same args. Note that these are not done for @@ -298,8 +298,8 @@ optimizations = [ # Conversions (('i2b', ('b2i', a)), a), - (('f2i', ('ftrunc', a)), ('f2i', a)), - (('f2u', ('ftrunc', a)), ('f2u', a)), + (('f2i32', ('ftrunc', a)), ('f2i32', a)), + (('f2u32', ('ftrunc', a)), ('f2u32', a)), (('i2b', ('ineg', a)), ('i2b', a)), (('i2b', ('iabs', a)), ('i2b', a)), (('fabs', ('b2f', a)), ('b2f', a)), @@ -387,49 +387,49 @@ optimizations = [ (('pack_unorm_2x16', 'v'), ('pack_uvec2_to_uint', - ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), + ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 65535.0)))), 'options->lower_pack_unorm_2x16'), (('pack_unorm_4x8', 'v'), ('pack_uvec4_to_uint', - ('f2u', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), + ('f2u32', ('fround_even', ('fmul', ('fsat', 'v'), 255.0)))), 'options->lower_pack_unorm_4x8'), (('pack_snorm_2x16', 'v'), ('pack_uvec2_to_uint', - ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), + ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 32767.0)))), 'options->lower_pack_snorm_2x16'), (('pack_snorm_4x8', 'v'), ('pack_uvec4_to_uint', - ('f2i', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), + ('f2i32', ('fround_even', ('fmul', ('fmin', 1.0, ('fmax', -1.0, 'v')), 127.0)))), 'options->lower_pack_snorm_4x8'), (('unpack_unorm_2x16', 'v'), - ('fdiv', ('u2f', ('vec2', ('extract_u16', 'v', 0), - ('extract_u16', 'v', 1))), + ('fdiv', ('u2f32', ('vec2', ('extract_u16', 'v', 0), + ('extract_u16', 'v', 1))), 65535.0), 'options->lower_unpack_unorm_2x16'), (('unpack_unorm_4x8', 'v'), - ('fdiv', ('u2f', ('vec4', ('extract_u8', 'v', 0), - ('extract_u8', 'v', 1), - ('extract_u8', 'v', 2), - ('extract_u8', 'v', 3))), + ('fdiv', ('u2f32', ('vec4', ('extract_u8', 'v', 0), + ('extract_u8', 'v', 1), + ('extract_u8', 'v', 2), + ('extract_u8', 'v', 3))), 255.0), 'options->lower_unpack_unorm_4x8'), (('unpack_snorm_2x16', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec2', ('extract_i16', 'v', 0), - ('extract_i16', 'v', 1))), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec2', ('extract_i16', 'v', 0), + ('extract_i16', 'v', 1))), 32767.0))), 'options->lower_unpack_snorm_2x16'), (('unpack_snorm_4x8', 'v'), - ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f', ('vec4', ('extract_i8', 'v', 0), - ('extract_i8', 'v', 1), - ('extract_i8', 'v', 2), - ('extract_i8', 'v', 3))), + ('fmin', 1.0, ('fmax', -1.0, ('fdiv', ('i2f32', ('vec4', ('extract_i8', 'v', 0), + ('extract_i8', 'v', 1), + ('extract_i8', 'v', 2), + ('extract_i8', 'v', 3))), 127.0))), 'options->lower_unpack_snorm_4x8'), ] |