/* * Copyright © 2019 Google, Inc * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /** * \file lower_precision.cpp */ #include "main/macros.h" #include "main/mtypes.h" #include "compiler/glsl_types.h" #include "ir.h" #include "ir_builder.h" #include "ir_optimization.h" #include "ir_rvalue_visitor.h" #include "util/half_float.h" #include "util/set.h" #include "util/hash_table.h" #include namespace { class find_precision_visitor : public ir_rvalue_enter_visitor { public: find_precision_visitor(const struct gl_shader_compiler_options *options); ~find_precision_visitor(); virtual void handle_rvalue(ir_rvalue **rvalue); virtual ir_visitor_status visit_enter(ir_call *ir); ir_function_signature *map_builtin(ir_function_signature *sig); /* Set of rvalues that can be lowered. This will be filled in by * find_lowerable_rvalues_visitor. Only the root node of a lowerable section * will be added to this set. */ struct set *lowerable_rvalues; /** * A mapping of builtin signature functions to lowered versions. This is * filled in lazily when a lowered version is needed. */ struct hash_table *lowered_builtins; /** * A temporary hash table only used in order to clone functions. */ struct hash_table *clone_ht; void *lowered_builtin_mem_ctx; const struct gl_shader_compiler_options *options; }; class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor { public: enum can_lower_state { UNKNOWN, CANT_LOWER, SHOULD_LOWER, }; enum parent_relation { /* The parent performs a further operation involving the result from the * child and can be lowered along with it. */ COMBINED_OPERATION, /* The parent instruction’s operation is independent of the child type so * the child should be lowered separately. */ INDEPENDENT_OPERATION, }; struct stack_entry { ir_instruction *instr; enum can_lower_state state; /* List of child rvalues that can be lowered. When this stack entry is * popped, if this node itself can’t be lowered than all of the children * are root nodes to lower so we will add them to lowerable_rvalues. * Otherwise if this node can also be lowered then we won’t add the * children because we only want to add the topmost lowerable nodes to * lowerable_rvalues and the children will be lowered as part of lowering * this node. */ std::vector lowerable_children; }; find_lowerable_rvalues_visitor(struct set *result, const struct gl_shader_compiler_options *options); static void stack_enter(class ir_instruction *ir, void *data); static void stack_leave(class ir_instruction *ir, void *data); virtual ir_visitor_status visit(ir_constant *ir); virtual ir_visitor_status visit(ir_dereference_variable *ir); virtual ir_visitor_status visit_enter(ir_dereference_record *ir); virtual ir_visitor_status visit_enter(ir_dereference_array *ir); virtual ir_visitor_status visit_enter(ir_texture *ir); virtual ir_visitor_status visit_enter(ir_expression *ir); virtual ir_visitor_status visit_leave(ir_assignment *ir); virtual ir_visitor_status visit_leave(ir_call *ir); can_lower_state handle_precision(const glsl_type *type, int precision) const; static parent_relation get_parent_relation(ir_instruction *parent, ir_instruction *child); std::vector stack; struct set *lowerable_rvalues; const struct gl_shader_compiler_options *options; void pop_stack_entry(); void add_lowerable_children(const stack_entry &entry); }; class lower_precision_visitor : public ir_rvalue_visitor { public: virtual void handle_rvalue(ir_rvalue **rvalue); virtual ir_visitor_status visit_enter(ir_dereference_array *); virtual ir_visitor_status visit_enter(ir_dereference_record *); virtual ir_visitor_status visit_enter(ir_call *ir); virtual ir_visitor_status visit_enter(ir_texture *ir); virtual ir_visitor_status visit_leave(ir_expression *); }; static bool can_lower_type(const struct gl_shader_compiler_options *options, const glsl_type *type) { /* Don’t lower any expressions involving non-float types except bool and * texture samplers. This will rule out operations that change the type such * as conversion to ints. Instead it will end up lowering the arguments * instead and adding a final conversion to float32. We want to handle * boolean types so that it will do comparisons as 16-bit. */ switch (type->without_array()->base_type) { /* TODO: should we do anything for these two with regard to Int16 vs FP16 * support? */ case GLSL_TYPE_BOOL: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_IMAGE: return true; case GLSL_TYPE_FLOAT: return options->LowerPrecisionFloat16; case GLSL_TYPE_UINT: case GLSL_TYPE_INT: return options->LowerPrecisionInt16; default: return false; } } find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res, const struct gl_shader_compiler_options *opts) { lowerable_rvalues = res; options = opts; callback_enter = stack_enter; callback_leave = stack_leave; data_enter = this; data_leave = this; } void find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir, void *data) { find_lowerable_rvalues_visitor *state = (find_lowerable_rvalues_visitor *) data; /* Add a new stack entry for this instruction */ stack_entry entry; entry.instr = ir; entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN; state->stack.push_back(entry); } void find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry) { /* We can’t lower this node so if there were any pending children then they * are all root lowerable nodes and we should add them to the set. */ for (auto &it : entry.lowerable_children) _mesa_set_add(lowerable_rvalues, it); } void find_lowerable_rvalues_visitor::pop_stack_entry() { const stack_entry &entry = stack.back(); if (stack.size() >= 2) { /* Combine this state into the parent state, unless the parent operation * doesn’t have any relation to the child operations */ stack_entry &parent = stack.end()[-2]; parent_relation rel = get_parent_relation(parent.instr, entry.instr); if (rel == COMBINED_OPERATION) { switch (entry.state) { case CANT_LOWER: parent.state = CANT_LOWER; break; case SHOULD_LOWER: if (parent.state == UNKNOWN) parent.state = SHOULD_LOWER; break; case UNKNOWN: break; } } } if (entry.state == SHOULD_LOWER) { ir_rvalue *rv = entry.instr->as_rvalue(); if (rv == NULL) { add_lowerable_children(entry); } else if (stack.size() >= 2) { stack_entry &parent = stack.end()[-2]; switch (get_parent_relation(parent.instr, rv)) { case COMBINED_OPERATION: /* We only want to add the toplevel lowerable instructions to the * lowerable set. Therefore if there is a parent then instead of * adding this instruction to the set we will queue depending on * the result of the parent instruction. */ parent.lowerable_children.push_back(entry.instr); break; case INDEPENDENT_OPERATION: _mesa_set_add(lowerable_rvalues, rv); break; } } else { /* This is a toplevel node so add it directly to the lowerable * set. */ _mesa_set_add(lowerable_rvalues, rv); } } else if (entry.state == CANT_LOWER) { add_lowerable_children(entry); } stack.pop_back(); } void find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir, void *data) { find_lowerable_rvalues_visitor *state = (find_lowerable_rvalues_visitor *) data; state->pop_stack_entry(); } enum find_lowerable_rvalues_visitor::can_lower_state find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type, int precision) const { if (!can_lower_type(options, type)) return CANT_LOWER; switch (precision) { case GLSL_PRECISION_NONE: return UNKNOWN; case GLSL_PRECISION_HIGH: return CANT_LOWER; case GLSL_PRECISION_MEDIUM: case GLSL_PRECISION_LOW: return SHOULD_LOWER; } return CANT_LOWER; } enum find_lowerable_rvalues_visitor::parent_relation find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent, ir_instruction *child) { /* If the parent is a dereference instruction then the only child could be * for example an array dereference and that should be lowered independently * of the parent. */ if (parent->as_dereference()) return INDEPENDENT_OPERATION; /* The precision of texture sampling depend on the precision of the sampler. * The rest of the arguments don’t matter so we can treat it as an * independent operation. */ if (parent->as_texture()) return INDEPENDENT_OPERATION; return COMBINED_OPERATION; } ir_visitor_status find_lowerable_rvalues_visitor::visit(ir_constant *ir) { stack_enter(ir, this); if (!can_lower_type(options, ir->type)) stack.back().state = CANT_LOWER; stack_leave(ir, this); return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir) { stack_enter(ir, this); if (stack.back().state == UNKNOWN) stack.back().state = handle_precision(ir->type, ir->precision()); stack_leave(ir, this); return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir) { ir_hierarchical_visitor::visit_enter(ir); if (stack.back().state == UNKNOWN) stack.back().state = handle_precision(ir->type, ir->precision()); return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir) { ir_hierarchical_visitor::visit_enter(ir); if (stack.back().state == UNKNOWN) stack.back().state = handle_precision(ir->type, ir->precision()); return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir) { ir_hierarchical_visitor::visit_enter(ir); /* The precision of the sample value depends on the precision of the * sampler. */ stack.back().state = handle_precision(ir->type, ir->sampler->precision()); return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir) { ir_hierarchical_visitor::visit_enter(ir); if (!can_lower_type(options, ir->type)) stack.back().state = CANT_LOWER; /* Don't lower precision for derivative calculations */ if (!options->LowerPrecisionDerivatives && (ir->operation == ir_unop_dFdx || ir->operation == ir_unop_dFdx_coarse || ir->operation == ir_unop_dFdx_fine || ir->operation == ir_unop_dFdy || ir->operation == ir_unop_dFdy_coarse || ir->operation == ir_unop_dFdy_fine)) { stack.back().state = CANT_LOWER; } return visit_continue; } static bool function_always_returns_mediump_or_lowp(const char *name) { return !strcmp(name, "bitCount") || !strcmp(name, "findLSB") || !strcmp(name, "findMSB") || !strcmp(name, "unpackHalf2x16") || !strcmp(name, "unpackUnorm4x8") || !strcmp(name, "unpackSnorm4x8"); } static bool is_lowerable_builtin(ir_call *ir, const struct set *lowerable_rvalues) { /* The intrinsic call is inside the wrapper imageLoad function that will * be inlined. We have to handle both of them. */ if (ir->callee->intrinsic_id == ir_intrinsic_image_load || (ir->callee->is_builtin() && !strcmp(ir->callee_name(), "imageLoad"))) { ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); ir_variable *resource = param->variable_referenced(); assert(ir->callee->return_precision == GLSL_PRECISION_NONE); assert(resource->type->without_array()->is_image()); /* GLSL ES 3.20 requires that images have a precision modifier, but if * you set one, it doesn't do anything, because all intrinsics are * defined with highp. This seems to be a spec bug. * * In theory we could set the return value to mediump if the image * format has a lower precision. This appears to be the most sensible * thing to do. */ const struct util_format_description *desc = util_format_description(resource->data.image_format); unsigned i = util_format_get_first_non_void_channel(resource->data.image_format); if (desc->channel[i].pure_integer || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) return desc->channel[i].size <= 16; else return desc->channel[i].size <= 10; /* unorm/snorm */ } /* Handle special calls. */ if (ir->callee->is_builtin() && ir->actual_parameters.length()) { ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); ir_variable *var = param->variable_referenced(); /* Handle builtin wrappers around ir_texture opcodes. These wrappers will * be inlined by lower_precision() if we return true here, so that we can * get to ir_texture later and do proper lowering. * * We should lower the type of the return value if the sampler type * uses lower precision. The function parameters don't matter. */ if (var && var->type->without_array()->is_sampler()) { /* textureSize always returns highp. */ if (!strcmp(ir->callee_name(), "textureSize")) return false; return var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW; } } if (!ir->callee->is_builtin() || /* Parameters are always highp: */ !strcmp(ir->callee_name(), "floatBitsToInt") || !strcmp(ir->callee_name(), "floatBitsToUint") || !strcmp(ir->callee_name(), "intBitsToFloat") || !strcmp(ir->callee_name(), "uintBitsToFloat") || !strcmp(ir->callee_name(), "bitfieldReverse") || !strcmp(ir->callee_name(), "frexp") || !strcmp(ir->callee_name(), "ldexp") || /* Parameters and outputs are always highp: */ /* TODO: The operations are highp, but carry and borrow outputs are lowp. */ !strcmp(ir->callee_name(), "uaddCarry") || !strcmp(ir->callee_name(), "usubBorrow") || !strcmp(ir->callee_name(), "imulExtended") || !strcmp(ir->callee_name(), "umulExtended") || !strcmp(ir->callee_name(), "unpackUnorm2x16") || !strcmp(ir->callee_name(), "unpackSnorm2x16") || /* Outputs are highp: */ !strcmp(ir->callee_name(), "packUnorm2x16") || !strcmp(ir->callee_name(), "packSnorm2x16") || /* Parameters are mediump and outputs are highp. The parameters should * be optimized in NIR, not here, e.g: * - packHalf2x16 can just be a bitcast from f16vec2 to uint32 * - Other opcodes don't have to convert parameters to highp if the hw * has f16 versions. Optimize in NIR accordingly. */ !strcmp(ir->callee_name(), "packHalf2x16") || !strcmp(ir->callee_name(), "packUnorm4x8") || !strcmp(ir->callee_name(), "packSnorm4x8")) return false; assert(ir->callee->return_precision == GLSL_PRECISION_NONE); /* Number of parameters to check if they are lowerable. */ unsigned check_parameters = ir->actual_parameters.length(); /* Interpolation functions only consider the precision of the interpolant. */ /* Bitfield functions ignore the precision of "offset" and "bits". */ if (!strcmp(ir->callee_name(), "interpolateAtOffset") || !strcmp(ir->callee_name(), "interpolateAtSample") || !strcmp(ir->callee_name(), "bitfieldExtract")) { check_parameters = 1; } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) { check_parameters = 2; } if (function_always_returns_mediump_or_lowp(ir->callee_name())) { /* These only lower the return value. Parameters keep their precision, * which is preserved in map_builtin. */ check_parameters = 0; } foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { if (!check_parameters) break; if (!param->as_constant() && _mesa_set_search(lowerable_rvalues, param) == NULL) return false; --check_parameters; } return true; } ir_visitor_status find_lowerable_rvalues_visitor::visit_leave(ir_call *ir) { ir_hierarchical_visitor::visit_leave(ir); /* Special case for handling temporary variables generated by the compiler * for function calls. If we assign to one of these using a function call * that has a lowerable return type then we can assume the temporary * variable should have a medium precision too. */ /* Do nothing if the return type is void. */ if (!ir->return_deref) return visit_continue; ir_variable *var = ir->return_deref->variable_referenced(); assert(var->data.mode == ir_var_temporary); unsigned return_precision = ir->callee->return_precision; /* If the call is to a builtin, then the function won’t have a return * precision and we should determine it from the precision of the arguments. */ if (is_lowerable_builtin(ir, lowerable_rvalues)) return_precision = GLSL_PRECISION_MEDIUM; can_lower_state lower_state = handle_precision(var->type, return_precision); if (lower_state == SHOULD_LOWER) { /* There probably shouldn’t be any situations where multiple ir_call * instructions write to the same temporary? */ assert(var->data.precision == GLSL_PRECISION_NONE); var->data.precision = GLSL_PRECISION_MEDIUM; } else { var->data.precision = GLSL_PRECISION_HIGH; } return visit_continue; } ir_visitor_status find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir) { ir_hierarchical_visitor::visit_leave(ir); /* Special case for handling temporary variables generated by the compiler. * If we assign to one of these using a lowered precision then we can assume * the temporary variable should have a medium precision too. */ ir_variable *var = ir->lhs->variable_referenced(); if (var->data.mode == ir_var_temporary) { if (_mesa_set_search(lowerable_rvalues, ir->rhs)) { /* Only override the precision if this is the first assignment. For * temporaries such as the ones generated for the ?: operator there * can be multiple assignments with different precisions. This way we * get the highest precision of all of the assignments. */ if (var->data.precision == GLSL_PRECISION_NONE) var->data.precision = GLSL_PRECISION_MEDIUM; } else if (!ir->rhs->as_constant()) { var->data.precision = GLSL_PRECISION_HIGH; } } return visit_continue; } void find_lowerable_rvalues(const struct gl_shader_compiler_options *options, exec_list *instructions, struct set *result) { find_lowerable_rvalues_visitor v(result, options); visit_list_elements(&v, instructions); assert(v.stack.empty()); } static const glsl_type * convert_type(bool up, const glsl_type *type) { if (type->is_array()) { return glsl_type::get_array_instance(convert_type(up, type->fields.array), type->array_size(), type->explicit_stride); } glsl_base_type new_base_type; if (up) { switch (type->base_type) { case GLSL_TYPE_FLOAT16: new_base_type = GLSL_TYPE_FLOAT; break; case GLSL_TYPE_INT16: new_base_type = GLSL_TYPE_INT; break; case GLSL_TYPE_UINT16: new_base_type = GLSL_TYPE_UINT; break; default: unreachable("invalid type"); return NULL; } } else { switch (type->base_type) { case GLSL_TYPE_FLOAT: new_base_type = GLSL_TYPE_FLOAT16; break; case GLSL_TYPE_INT: new_base_type = GLSL_TYPE_INT16; break; case GLSL_TYPE_UINT: new_base_type = GLSL_TYPE_UINT16; break; default: unreachable("invalid type"); return NULL; } } return glsl_type::get_instance(new_base_type, type->vector_elements, type->matrix_columns, type->explicit_stride, type->interface_row_major); } static const glsl_type * lower_glsl_type(const glsl_type *type) { return convert_type(false, type); } static ir_rvalue * convert_precision(bool up, ir_rvalue *ir) { unsigned op; if (up) { switch (ir->type->without_array()->base_type) { case GLSL_TYPE_FLOAT16: op = ir_unop_f162f; break; case GLSL_TYPE_INT16: op = ir_unop_i2i; break; case GLSL_TYPE_UINT16: op = ir_unop_u2u; break; default: unreachable("invalid type"); return NULL; } } else { switch (ir->type->without_array()->base_type) { case GLSL_TYPE_FLOAT: op = ir_unop_f2fmp; break; case GLSL_TYPE_INT: op = ir_unop_i2imp; break; case GLSL_TYPE_UINT: op = ir_unop_u2ump; break; default: unreachable("invalid type"); return NULL; } } const glsl_type *desired_type = convert_type(up, ir->type); void *mem_ctx = ralloc_parent(ir); return new(mem_ctx) ir_expression(op, desired_type, ir, NULL); } void lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue) { ir_rvalue *ir = *rvalue; if (ir == NULL) return; if (ir->as_dereference()) { if (!ir->type->is_boolean()) *rvalue = convert_precision(false, ir); } else if (ir->type->is_32bit()) { ir->type = lower_glsl_type(ir->type); ir_constant *const_ir = ir->as_constant(); if (const_ir) { ir_constant_data value; if (ir->type->base_type == GLSL_TYPE_FLOAT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]); } else if (ir->type->base_type == GLSL_TYPE_INT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) value.i16[i] = const_ir->value.i[i]; } else if (ir->type->base_type == GLSL_TYPE_UINT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) value.u16[i] = const_ir->value.u[i]; } else { unreachable("invalid type"); } const_ir->value = value; } } } ir_visitor_status lower_precision_visitor::visit_enter(ir_dereference_record *ir) { /* We don’t want to lower the variable */ return visit_continue_with_parent; } ir_visitor_status lower_precision_visitor::visit_enter(ir_dereference_array *ir) { /* We don’t want to convert the array index or the variable. If the array * index itself is lowerable that will be handled separately. */ return visit_continue_with_parent; } ir_visitor_status lower_precision_visitor::visit_enter(ir_call *ir) { /* We don’t want to convert the arguments. These will be handled separately. */ return visit_continue_with_parent; } ir_visitor_status lower_precision_visitor::visit_enter(ir_texture *ir) { /* We don’t want to convert the arguments. These will be handled separately. */ return visit_continue_with_parent; } ir_visitor_status lower_precision_visitor::visit_leave(ir_expression *ir) { ir_rvalue_visitor::visit_leave(ir); /* If the expression is a conversion operation to or from bool then fix the * operation. */ switch (ir->operation) { case ir_unop_b2f: ir->operation = ir_unop_b2f16; break; case ir_unop_f2b: ir->operation = ir_unop_f162b; break; case ir_unop_b2i: case ir_unop_i2b: /* Nothing to do - they both support int16. */ break; default: break; } return visit_continue; } void find_precision_visitor::handle_rvalue(ir_rvalue **rvalue) { /* Checking the precision of rvalue can be lowered first throughout * find_lowerable_rvalues_visitor. * Once it found the precision of rvalue can be lowered, then we can * add conversion f2fmp, etc. through lower_precision_visitor. */ if (*rvalue == NULL) return; struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue); if (!entry) return; _mesa_set_remove(lowerable_rvalues, entry); /* If the entire expression is just a variable dereference then trying to * lower it will just directly add pointless to and from conversions without * any actual operation in-between. Although these will eventually get * optimised out, avoiding generating them here also avoids breaking inout * parameters to functions. */ if ((*rvalue)->as_dereference()) return; lower_precision_visitor v; (*rvalue)->accept(&v); v.handle_rvalue(rvalue); /* We don’t need to add the final conversion if the final type has been * converted to bool */ if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) { *rvalue = convert_precision(true, *rvalue); } } ir_visitor_status find_precision_visitor::visit_enter(ir_call *ir) { ir_rvalue_enter_visitor::visit_enter(ir); ir_variable *return_var = ir->return_deref ? ir->return_deref->variable_referenced() : NULL; /* Don't do anything for image_load here. We have only changed the return * value to mediump/lowp, so that following instructions can use reduced * precision. * * The return value type of the intrinsic itself isn't changed here, but * can be changed in NIR if all users use the *2*mp opcode. */ if (ir->callee->intrinsic_id == ir_intrinsic_image_load) return visit_continue; /* If this is a call to a builtin and the find_lowerable_rvalues_visitor * overrode the precision of the temporary return variable, then we can * replace the builtin implementation with a lowered version. */ if (!ir->callee->is_builtin() || return_var == NULL || (return_var->data.precision != GLSL_PRECISION_MEDIUM && return_var->data.precision != GLSL_PRECISION_LOW)) return visit_continue; ir->callee = map_builtin(ir->callee); ir->generate_inline(ir); ir->remove(); return visit_continue_with_parent; } ir_function_signature * find_precision_visitor::map_builtin(ir_function_signature *sig) { if (lowered_builtins == NULL) { lowered_builtins = _mesa_pointer_hash_table_create(NULL); clone_ht =_mesa_pointer_hash_table_create(NULL); lowered_builtin_mem_ctx = ralloc_context(NULL); } else { struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig); if (entry) return (ir_function_signature *) entry->data; } ir_function_signature *lowered_sig = sig->clone(lowered_builtin_mem_ctx, clone_ht); /* Functions that always return mediump or lowp should keep their * parameters intact, because they can be highp. NIR can lower * the up-conversion for parameters if needed. */ if (!function_always_returns_mediump_or_lowp(sig->function_name())) { foreach_in_list(ir_variable, param, &lowered_sig->parameters) { param->data.precision = GLSL_PRECISION_MEDIUM; } } lower_precision(options, &lowered_sig->body); _mesa_hash_table_clear(clone_ht, NULL); _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig); return lowered_sig; } find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options) : lowerable_rvalues(_mesa_pointer_set_create(NULL)), lowered_builtins(NULL), clone_ht(NULL), lowered_builtin_mem_ctx(NULL), options(options) { } find_precision_visitor::~find_precision_visitor() { _mesa_set_destroy(lowerable_rvalues, NULL); if (lowered_builtins) { _mesa_hash_table_destroy(lowered_builtins, NULL); _mesa_hash_table_destroy(clone_ht, NULL); ralloc_free(lowered_builtin_mem_ctx); } } /* Lowering opcodes to 16 bits is not enough for programs with control flow * (and the ?: operator, which is represented by if-then-else in the IR), * because temporary variables, which are used for passing values between * code blocks, are not lowered, resulting in 32-bit phis in NIR. * * First change the variable types to 16 bits, then change all ir_dereference * types to 16 bits. */ class lower_variables_visitor : public ir_rvalue_enter_visitor { public: lower_variables_visitor(const struct gl_shader_compiler_options *options) : options(options) { lower_vars = _mesa_pointer_set_create(NULL); } virtual ~lower_variables_visitor() { _mesa_set_destroy(lower_vars, NULL); } virtual ir_visitor_status visit(ir_variable *var); virtual ir_visitor_status visit_enter(ir_assignment *ir); virtual ir_visitor_status visit_enter(ir_return *ir); virtual ir_visitor_status visit_enter(ir_call *ir); virtual void handle_rvalue(ir_rvalue **rvalue); void fix_types_in_deref_chain(ir_dereference *ir); void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs, bool insert_before); const struct gl_shader_compiler_options *options; set *lower_vars; }; static void lower_constant(ir_constant *ir) { if (ir->type->is_array()) { for (int i = 0; i < ir->type->array_size(); i++) lower_constant(ir->get_array_element(i)); ir->type = lower_glsl_type(ir->type); return; } ir->type = lower_glsl_type(ir->type); ir_constant_data value; if (ir->type->base_type == GLSL_TYPE_FLOAT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) value.f16[i] = _mesa_float_to_half(ir->value.f[i]); } else if (ir->type->base_type == GLSL_TYPE_INT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) value.i16[i] = ir->value.i[i]; } else if (ir->type->base_type == GLSL_TYPE_UINT16) { for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) value.u16[i] = ir->value.u[i]; } else { unreachable("invalid type"); } ir->value = value; } ir_visitor_status lower_variables_visitor::visit(ir_variable *var) { if ((var->data.mode != ir_var_temporary && var->data.mode != ir_var_auto) || !var->type->without_array()->is_32bit() || (var->data.precision != GLSL_PRECISION_MEDIUM && var->data.precision != GLSL_PRECISION_LOW) || !can_lower_type(options, var->type)) return visit_continue; /* Lower constant initializers. */ if (var->constant_value && var->type == var->constant_value->type) { var->constant_value = var->constant_value->clone(ralloc_parent(var), NULL); lower_constant(var->constant_value); } if (var->constant_initializer && var->type == var->constant_initializer->type) { var->constant_initializer = var->constant_initializer->clone(ralloc_parent(var), NULL); lower_constant(var->constant_initializer); } var->type = lower_glsl_type(var->type); _mesa_set_add(lower_vars, var); return visit_continue; } void lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir) { assert(ir->type->without_array()->is_32bit()); assert(_mesa_set_search(lower_vars, ir->variable_referenced())); /* Fix the type in the dereference node. */ ir->type = lower_glsl_type(ir->type); /* If it's an array, fix the types in the whole dereference chain. */ for (ir_dereference_array *deref_array = ir->as_dereference_array(); deref_array; deref_array = deref_array->array->as_dereference_array()) { assert(deref_array->array->type->without_array()->is_32bit()); deref_array->array->type = lower_glsl_type(deref_array->array->type); } } void lower_variables_visitor::convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs, bool insert_before) { void *mem_ctx = ralloc_parent(lhs); if (lhs->type->is_array()) { for (unsigned i = 0; i < lhs->type->length; i++) { ir_dereference *l, *r; l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL), new(mem_ctx) ir_constant(i)); r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL), new(mem_ctx) ir_constant(i)); convert_split_assignment(l, r, insert_before); } return; } assert(lhs->type->is_16bit() || lhs->type->is_32bit()); assert(rhs->type->is_16bit() || rhs->type->is_32bit()); assert(lhs->type->is_16bit() != rhs->type->is_16bit()); ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs)); if (insert_before) base_ir->insert_before(assign); else base_ir->insert_after(assign); } ir_visitor_status lower_variables_visitor::visit_enter(ir_assignment *ir) { ir_dereference *lhs = ir->lhs; ir_variable *var = lhs->variable_referenced(); ir_dereference *rhs_deref = ir->rhs->as_dereference(); ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL; ir_constant *rhs_const = ir->rhs->as_constant(); /* Legalize array assignments between lowered and non-lowered variables. */ if (lhs->type->is_array() && (rhs_var || rhs_const) && (!rhs_var || var->type->without_array()->is_16bit() != rhs_var->type->without_array()->is_16bit()) && (!rhs_const || (var->type->without_array()->is_16bit() && rhs_const->type->without_array()->is_32bit()))) { assert(ir->rhs->type->is_array()); /* Fix array assignments from lowered to non-lowered. */ if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) { fix_types_in_deref_chain(rhs_deref); /* Convert to 32 bits for LHS. */ convert_split_assignment(lhs, rhs_deref, true); ir->remove(); return visit_continue; } /* Fix array assignments from non-lowered to lowered. */ if (_mesa_set_search(lower_vars, var) && ir->rhs->type->without_array()->is_32bit()) { fix_types_in_deref_chain(lhs); /* Convert to 16 bits for LHS. */ convert_split_assignment(lhs, ir->rhs, true); ir->remove(); return visit_continue; } } /* Fix assignment types. */ if (_mesa_set_search(lower_vars, var)) { /* Fix the LHS type. */ if (lhs->type->without_array()->is_32bit()) fix_types_in_deref_chain(lhs); /* Fix the RHS type if it's a lowered variable. */ if (rhs_var && _mesa_set_search(lower_vars, rhs_var) && rhs_deref->type->without_array()->is_32bit()) fix_types_in_deref_chain(rhs_deref); /* Fix the RHS type if it's a non-array expression. */ if (ir->rhs->type->is_32bit()) { ir_expression *expr = ir->rhs->as_expression(); /* Convert the RHS to the LHS type. */ if (expr && (expr->operation == ir_unop_f162f || expr->operation == ir_unop_i2i || expr->operation == ir_unop_u2u) && expr->operands[0]->type->is_16bit()) { /* If there is an "up" conversion, just remove it. * This is optional. We could as well execute the else statement and * let NIR eliminate the up+down conversions. */ ir->rhs = expr->operands[0]; } else { /* Add a "down" conversion operation to fix the type of RHS. */ ir->rhs = convert_precision(false, ir->rhs); } } } return ir_rvalue_enter_visitor::visit_enter(ir); } ir_visitor_status lower_variables_visitor::visit_enter(ir_return *ir) { void *mem_ctx = ralloc_parent(ir); ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL; if (deref) { ir_variable *var = deref->variable_referenced(); /* Fix the type of the return value. */ if (_mesa_set_search(lower_vars, var) && deref->type->without_array()->is_32bit()) { /* Create a 32-bit temporary variable. */ ir_variable *new_var = new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary); base_ir->insert_before(new_var); /* Fix types in dereferences. */ fix_types_in_deref_chain(deref); /* Convert to 32 bits for the return value. */ convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), deref, true); ir->value = new(mem_ctx) ir_dereference_variable(new_var); } } return ir_rvalue_enter_visitor::visit_enter(ir); } void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue) { ir_rvalue *ir = *rvalue; if (in_assignee || ir == NULL) return; ir_expression *expr = ir->as_expression(); ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL; /* Remove f2fmp(float16). Same for int16 and uint16. */ if (expr && expr_op0_deref && (expr->operation == ir_unop_f2fmp || expr->operation == ir_unop_i2imp || expr->operation == ir_unop_u2ump || expr->operation == ir_unop_f2f16 || expr->operation == ir_unop_i2i || expr->operation == ir_unop_u2u) && expr->type->without_array()->is_16bit() && expr_op0_deref->type->without_array()->is_32bit() && _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) { fix_types_in_deref_chain(expr_op0_deref); /* Remove f2fmp/i2imp/u2ump. */ *rvalue = expr_op0_deref; return; } ir_dereference *deref = ir->as_dereference(); if (deref) { ir_variable *var = deref->variable_referenced(); assert(var); if (_mesa_set_search(lower_vars, var) && deref->type->without_array()->is_32bit()) { fix_types_in_deref_chain(deref); /* Then convert the type up. Optimizations should eliminate this. */ *rvalue = convert_precision(true, deref); } } } ir_visitor_status lower_variables_visitor::visit_enter(ir_call *ir) { void *mem_ctx = ralloc_parent(ir); /* We can't pass 16-bit variables as 32-bit inout/out parameters. */ foreach_two_lists(formal_node, &ir->callee->parameters, actual_node, &ir->actual_parameters) { ir_dereference *param_deref = ((ir_rvalue *)actual_node)->as_dereference(); ir_variable *param = (ir_variable *)formal_node; if (!param_deref) continue; ir_variable *var = param_deref->variable_referenced(); if (_mesa_set_search(lower_vars, var) && param->type->without_array()->is_32bit()) { fix_types_in_deref_chain(param_deref); /* Create a 32-bit temporary variable for the parameter. */ ir_variable *new_var = new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary); base_ir->insert_before(new_var); /* Replace the parameter. */ actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var)); if (param->data.mode == ir_var_function_in || param->data.mode == ir_var_function_inout) { /* Convert to 32 bits for passing in. */ convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), param_deref->clone(mem_ctx, NULL), true); } if (param->data.mode == ir_var_function_out || param->data.mode == ir_var_function_inout) { /* Convert to 16 bits after returning. */ convert_split_assignment(param_deref, new(mem_ctx) ir_dereference_variable(new_var), false); } } } /* Fix the type of return value dereferencies. */ ir_dereference_variable *ret_deref = ir->return_deref; ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL; if (ret_var && _mesa_set_search(lower_vars, ret_var) && ret_deref->type->without_array()->is_32bit()) { /* Create a 32-bit temporary variable. */ ir_variable *new_var = new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp", ir_var_temporary); base_ir->insert_before(new_var); /* Replace the return variable. */ ret_deref->var = new_var; /* Convert to 16 bits after returning. */ convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var), new(mem_ctx) ir_dereference_variable(new_var), false); } return ir_rvalue_enter_visitor::visit_enter(ir); } } void lower_precision(const struct gl_shader_compiler_options *options, exec_list *instructions) { find_precision_visitor v(options); find_lowerable_rvalues(options, instructions, v.lowerable_rvalues); visit_list_elements(&v, instructions); if (options->LowerPrecisionTemporaries) { lower_variables_visitor vars(options); visit_list_elements(&vars, instructions); } }