diff options
author | Connor Abbott <[email protected]> | 2015-08-14 10:45:06 -0700 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2016-03-17 11:16:33 +0100 |
commit | 9076c4e289de0debf1fb2a7237bdeb9c11002347 (patch) | |
tree | 75e555d2db2023457a9b9c521f396ee88ce5f888 /src/compiler/nir/nir_constant_expressions.py | |
parent | 6700d7e423bb2d7c5f0b46740bd92b5e65679eaf (diff) |
nir: update opcode definitions for different bit sizes
Some opcodes need explicit bitsizes, and sometimes we need to use the
double version when constant folding.
v2: fix output type for u2f (Iago)
v3: do not change vecN opcodes to be float. The next commit will add
infrastructure to enable 64-bit integer constant folding so this is isn't
really necessary. Also, that created problems with source modifiers in
some cases (Iago)
v4 (Jason):
- do not change bcsel to work in terms of floats
- leave ldexp generic
Squashed changes to handle different bit sizes when constant
folding since otherwise we would break the build.
v2:
- Use the bit-size information from the opcode information if defined (Iago)
- Use helpers to get type size and base type of nir_alu_type enum (Sam)
- Do not fallback to sized types to guess bit-size information. (Jason)
Squashed changes in i965 and gallium/nir drivers to support sized types.
These functions should only see sized types, but we can't make that change
until we make sure that nir uses the sized versions in all the relevant places.
A later commit will address this.
Signed-off-by: Iago Toral Quiroga <[email protected]>
Signed-off-by: Samuel Iglesias Gonsálvez <[email protected]>
Reviewed-by: Jason Ekstrand <[email protected]>
Reviewed-by: Samuel Iglesias Gonsálvez <[email protected]>
Reviewed-by: Iago Toral Quiroga <[email protected]>
Diffstat (limited to 'src/compiler/nir/nir_constant_expressions.py')
-rw-r--r-- | src/compiler/nir/nir_constant_expressions.py | 246 |
1 files changed, 159 insertions, 87 deletions
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py index 32784f6398d..972d2819af9 100644 --- a/src/compiler/nir/nir_constant_expressions.py +++ b/src/compiler/nir/nir_constant_expressions.py @@ -1,4 +1,43 @@ #! /usr/bin/python2 + +def type_has_size(type_): + return type_[-1:].isdigit() + +def type_sizes(type_): + if type_.endswith("8"): + return [8] + elif type_.endswith("16"): + return [16] + elif type_.endswith("32"): + return [32] + elif type_.endswith("64"): + return [64] + else: + return [32, 64] + +def type_add_size(type_, size): + if type_has_size(type_): + return type_ + return type_ + str(size) + +def get_const_field(type_): + if type_ == "int32": + return "i" + if type_ == "uint32": + return "u" + if type_ == "int64": + return "l" + if type_ == "uint64": + return "ul" + if type_ == "bool32": + return "b" + if type_ == "float32": + return "f" + if type_ == "float64": + return "d" + raise Exception(str(type_)) + assert(0) + template = """\ /* * Copyright (C) 2014 Intel Corporation @@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u) } /* Some typed vector structures to make things like src0.y work */ -% for type in ["float", "int", "uint", "bool"]: -struct ${type}_vec { - ${type} x; - ${type} y; - ${type} z; - ${type} w; +typedef float float32_t; +typedef double float64_t; +typedef bool bool32_t; +% for type in ["float", "int", "uint"]: +% for width in [32, 64]: +struct ${type}${width}_vec { + ${type}${width}_t x; + ${type}${width}_t y; + ${type}${width}_t z; + ${type}${width}_t w; }; % endfor +% endfor + +struct bool32_vec { + bool x; + bool y; + bool z; + bool w; +}; % for name, op in sorted(opcodes.iteritems()): static nir_const_value -evaluate_${name}(unsigned num_components, nir_const_value *_src) +evaluate_${name}(unsigned num_components, unsigned bit_size, + nir_const_value *_src) { nir_const_value _dst_val = { { {0, 0, 0, 0} } }; - ## For each non-per-component input, create a variable srcN that - ## contains x, y, z, and w elements which are filled in with the - ## appropriately-typed values. - % for j in range(op.num_inputs): - % if op.input_sizes[j] == 0: - <% continue %> - % elif "src" + str(j) not in op.const_expr: - ## Avoid unused variable warnings - <% continue %> - %endif - - struct ${op.input_types[j]}_vec src${j} = { - % for k in range(op.input_sizes[j]): - % if op.input_types[j] == "bool": - _src[${j}].u[${k}] != 0, - % else: - _src[${j}].${op.input_types[j][:1]}[${k}], - % endif - % endfor - }; - % endfor + switch (bit_size) { + % for bit_size in [32, 64]: + case ${bit_size}: { + <% + output_type = type_add_size(op.output_type, bit_size) + input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] + %> + + ## For each non-per-component input, create a variable srcN that + ## contains x, y, z, and w elements which are filled in with the + ## appropriately-typed values. + % for j in range(op.num_inputs): + % if op.input_sizes[j] == 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + %endif - % if op.output_size == 0: - ## For per-component instructions, we need to iterate over the - ## components and apply the constant expression one component - ## at a time. - for (unsigned _i = 0; _i < num_components; _i++) { - ## For each per-component input, create a variable srcN that - ## contains the value of the current (_i'th) component. - % for j in range(op.num_inputs): - % if op.input_sizes[j] != 0: - <% continue %> - % elif "src" + str(j) not in op.const_expr: - ## Avoid unused variable warnings - <% continue %> - % elif op.input_types[j] == "bool": - bool src${j} = _src[${j}].u[_i] != 0; + struct ${input_types[j]}_vec src${j} = { + % for k in range(op.input_sizes[j]): + % if input_types[j] == "bool32": + _src[${j}].u[${k}] != 0, % else: - ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i]; + _src[${j}].${get_const_field(input_types[j])}[${k}], % endif % endfor + }; + % endfor + + % if op.output_size == 0: + ## For per-component instructions, we need to iterate over the + ## components and apply the constant expression one component + ## at a time. + for (unsigned _i = 0; _i < num_components; _i++) { + ## For each per-component input, create a variable srcN that + ## contains the value of the current (_i'th) component. + % for j in range(op.num_inputs): + % if op.input_sizes[j] != 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + % elif input_types[j] == "bool32": + bool src${j} = _src[${j}].u[_i] != 0; + % else: + ${input_types[j]}_t src${j} = + _src[${j}].${get_const_field(input_types[j])}[_i]; + % endif + % endfor + + ## Create an appropriately-typed variable dst and assign the + ## result of the const_expr to it. If const_expr already contains + ## writes to dst, just include const_expr directly. + % if "dst" in op.const_expr: + ${output_type}_t dst; + ${op.const_expr} + % else: + ${output_type}_t dst = ${op.const_expr}; + % endif + + ## Store the current component of the actual destination to the + ## value of dst. + % if output_type == "bool32": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${get_const_field(output_type)}[_i] = dst; + % endif + } + % else: + ## In the non-per-component case, create a struct dst with + ## appropriately-typed elements x, y, z, and w and assign the result + ## of the const_expr to all components of dst, or include the + ## const_expr directly if it writes to dst already. + struct ${output_type}_vec dst; - ## Create an appropriately-typed variable dst and assign the - ## result of the const_expr to it. If const_expr already contains - ## writes to dst, just include const_expr directly. % if "dst" in op.const_expr: - ${op.output_type} dst; ${op.const_expr} % else: - ${op.output_type} dst = ${op.const_expr}; + ## Splat the value to all components. This way expressions which + ## write the same value to all components don't need to explicitly + ## write to dest. One such example is fnoise which has a + ## const_expr of 0.0f. + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; % endif - ## Store the current component of the actual destination to the - ## value of dst. - % if op.output_type == "bool": - ## Sanitize the C value to a proper NIR bool - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; - % else: - _dst_val.${op.output_type[:1]}[_i] = dst; - % endif - } - % else: - ## In the non-per-component case, create a struct dst with - ## appropriately-typed elements x, y, z, and w and assign the result - ## of the const_expr to all components of dst, or include the - ## const_expr directly if it writes to dst already. - struct ${op.output_type}_vec dst; - - % if "dst" in op.const_expr: - ${op.const_expr} - % else: - ## Splat the value to all components. This way expressions which - ## write the same value to all components don't need to explicitly - ## write to dest. One such example is fnoise which has a - ## const_expr of 0.0f. - dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; + ## For each component in the destination, copy the value of dst to + ## the actual destination. + % for k in range(op.output_size): + % if output_type == "bool32": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; + % endif + % endfor % endif - ## For each component in the destination, copy the value of dst to - ## the actual destination. - % for k in range(op.output_size): - % if op.output_type == "bool": - ## Sanitize the C value to a proper NIR bool - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; - % else: - _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; - % endif - % endfor - % endif + break; + } + % endfor + + default: + unreachable("unknown bit width"); + } return _dst_val; } @@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src) nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - nir_const_value *src) + unsigned bit_width, nir_const_value *src) { switch (op) { % for name in sorted(opcodes.iterkeys()): case nir_op_${name}: { - return evaluate_${name}(num_components, src); + return evaluate_${name}(num_components, bit_width, src); break; } % endfor @@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components, from nir_opcodes import opcodes from mako.template import Template -print Template(template).render(opcodes=opcodes) +print Template(template).render(opcodes=opcodes, type_sizes=type_sizes, + type_has_size=type_has_size, + type_add_size=type_add_size, + get_const_field=get_const_field) |