diff options
author | Jason Ekstrand <[email protected]> | 2015-10-19 11:15:32 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-10-19 14:14:21 -0700 |
commit | 958fc04dc51a2561c8598f42df59e3d9139e56a7 (patch) | |
tree | b6acf05aa073e97ae8e58647bf05c2c3e816f041 /src/glsl | |
parent | 995d9c4ac7fb046e01196cec308ebe10002a28da (diff) | |
parent | de862f03accb12b044ced60cb98f47a055457223 (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/glsl')
49 files changed, 2274 insertions, 763 deletions
diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 08368311b8a..8b0a73b250a 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -148,9 +148,6 @@ libglsl_la_SOURCES = \ libnir_la_SOURCES = \ - glsl_types.cpp \ - builtin_types.cpp \ - glsl_symbol_table.cpp \ $(NIR_FILES) \ $(NIR_GENERATED_FILES) @@ -160,6 +157,7 @@ glsl_compiler_SOURCES = \ glsl_compiler_LDADD = \ libglsl.la \ $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) spirv2nir_SOURCES = \ @@ -284,6 +282,5 @@ nir_tests_control_flow_tests_CFLAGS = \ nir_tests_control_flow_tests_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(top_builddir)/src/glsl/libnir.la \ - $(top_builddir)/src/libglsl_util.la \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 65a26268c2e..47dc628101d 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -20,6 +20,8 @@ NIR_GENERATED_FILES = \ NIR_FILES = \ nir/glsl_to_nir.cpp \ nir/glsl_to_nir.h \ + nir/glsl_types.cpp \ + nir/glsl_types.h \ nir/nir.c \ nir/nir.h \ nir/nir_array.h \ @@ -33,6 +35,8 @@ NIR_FILES = \ nir/nir_gs_count_vertices.c \ nir/nir_intrinsics.c \ nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ nir/nir_live_variables.c \ nir/nir_lower_alu_to_scalar.c \ nir/nir_lower_atomics.c \ @@ -81,6 +85,8 @@ NIR_FILES = \ nir/nir_worklist.c \ nir/nir_worklist.h \ nir/nir_types.cpp \ + nir/shader_enums.h \ + nir/shader_enums.c \ nir/spirv_to_nir.c \ nir/spirv_glsl450_to_nir.c @@ -103,8 +109,6 @@ LIBGLSL_FILES = \ glsl_parser_extras.h \ glsl_symbol_table.cpp \ glsl_symbol_table.h \ - glsl_types.cpp \ - glsl_types.h \ hir_field_selection.cpp \ ir_basic_block.cpp \ ir_basic_block.h \ @@ -206,8 +210,7 @@ LIBGLSL_FILES = \ opt_vectorize.cpp \ program.h \ s_expression.cpp \ - s_expression.h \ - shader_enums.h + s_expression.h # glsl_compiler diff --git a/src/glsl/SConscript b/src/glsl/SConscript index 89c603580a5..70bf5b09c3c 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -16,6 +16,7 @@ env.Prepend(CPPPATH = [ '#src/gallium/include', '#src/gallium/auxiliary', '#src/glsl', + '#src/glsl/nir', '#src/glsl/glcpp', ]) @@ -60,6 +61,12 @@ source_lists = env.ParseSourceList('Makefile.sources') for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'): glsl_sources += source_lists[l] +# add nir/glsl_types.cpp manually, because SCons still doesn't know about NIR. +# XXX: Remove this once we build NIR and NIR_FILES. +glsl_sources += [ + 'nir/glsl_types.cpp', +] + if env['msvc']: env.Prepend(CPPPATH = ['#/src/getopt']) env.PrependUnique(LIBS = [getopt]) diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 4c314366133..e803e6d7675 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -62,6 +62,8 @@ public: virtual ir_rvalue *hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); + virtual bool has_sequence_subexpression() const; + /** * Retrieve the source location of an AST node * @@ -181,6 +183,7 @@ enum ast_operators { ast_post_dec, ast_field_selection, ast_array_index, + ast_unsized_array_dim, ast_function_call, @@ -221,6 +224,8 @@ public: virtual void hir_no_rvalue(exec_list *instructions, struct _mesa_glsl_parse_state *state); + virtual bool has_sequence_subexpression() const; + ir_rvalue *do_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state, bool needs_rvalue); @@ -299,6 +304,8 @@ public: virtual void hir_no_rvalue(exec_list *instructions, struct _mesa_glsl_parse_state *state); + virtual bool has_sequence_subexpression() const; + private: /** * Is this function call actually a constructor? @@ -318,16 +325,7 @@ public: class ast_array_specifier : public ast_node { public: - /** Unsized array specifier ([]) */ - explicit ast_array_specifier(const struct YYLTYPE &locp) - : is_unsized_array(true) - { - set_location(locp); - } - - /** Sized array specifier ([dim]) */ ast_array_specifier(const struct YYLTYPE &locp, ast_expression *dim) - : is_unsized_array(false) { set_location(locp); array_dimensions.push_tail(&dim->link); @@ -338,13 +336,16 @@ public: array_dimensions.push_tail(&dim->link); } - virtual void print(void) const; + const bool is_single_dimension() + { + return this->array_dimensions.tail_pred->prev != NULL && + this->array_dimensions.tail_pred->prev->is_head_sentinel(); + } - /* If true, this means that the array has an unsized outermost dimension. */ - bool is_unsized_array; + virtual void print(void) const; /* This list contains objects of type ast_node containing the - * sized dimensions only, in outermost-to-innermost order. + * array dimensions in outermost-to-innermost order. */ exec_list array_dimensions; }; diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index 5e8f49d70b0..74d403fdb65 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -28,13 +28,10 @@ void ast_array_specifier::print(void) const { - if (this->is_unsized_array) { - printf("[ ] "); - } - foreach_list_typed (ast_node, array_dimension, link, &this->array_dimensions) { printf("[ "); - array_dimension->print(); + if (((ast_expression*)array_dimension)->oper != ast_unsized_array_dim) + array_dimension->print(); printf("] "); } } @@ -64,21 +61,29 @@ update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc, } } else if (ir_dereference_record *deref_record = ir->as_dereference_record()) { - /* There are two possibilities we need to consider: + /* There are three possibilities we need to consider: * * - Accessing an element of an array that is a member of a named * interface block (e.g. ifc.foo[i]) * * - Accessing an element of an array that is a member of a named * interface block array (e.g. ifc[j].foo[i]). + * + * - Accessing an element of an array that is a member of a named + * interface block array of arrays (e.g. ifc[j][k].foo[i]). */ ir_dereference_variable *deref_var = deref_record->record->as_dereference_variable(); if (deref_var == NULL) { - if (ir_dereference_array *deref_array = - deref_record->record->as_dereference_array()) { - deref_var = deref_array->array->as_dereference_variable(); + ir_dereference_array *deref_array = + deref_record->record->as_dereference_array(); + ir_dereference_array *deref_array_prev = NULL; + while (deref_array != NULL) { + deref_array_prev = deref_array; + deref_array = deref_array->array->as_dereference_array(); } + if (deref_array_prev != NULL) + deref_var = deref_array_prev->array->as_dereference_variable(); } if (deref_var != NULL) { @@ -230,7 +235,7 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, ir_var_shader_storage) { _mesa_glsl_error(&loc, state, "unsized array index must be constant"); } - } else if (array->type->fields.array->is_interface() + } else if (array->type->without_array()->is_interface() && (array->variable_referenced()->data.mode == ir_var_uniform || array->variable_referenced()->data.mode == ir_var_shader_storage) && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 26d4c62ce36..c5c5cae333b 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -437,13 +437,54 @@ generate_call(exec_list *instructions, ir_function_signature *sig, } } - /* If the function call is a constant expression, don't generate any - * instructions; just generate an ir_constant. + /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says: + * + * "Initializers for const declarations must be formed from literal + * values, other const variables (not including function call + * paramaters), or expressions of these. + * + * Constructors may be used in such expressions, but function calls may + * not." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions, the noise functions, and ftransform. The built-in + * functions dFdx, dFdy, and fwidth must return 0 when evaluated + * inside an initializer with an argument that is a constant + * expression." + * + * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says: + * + * "A constant expression is one of * - * Function calls were first allowed to be constant expressions in GLSL - * 1.20 and GLSL ES 3.00. + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions." + * + * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions. The built-in functions dFdx, dFdy, and fwidth must + * return 0 when evaluated inside an initializer with an argument + * that is a constant expression." + * + * If the function call is a constant expression, don't generate any + * instructions; just generate an ir_constant. */ - if (state->is_version(120, 300)) { + if (state->is_version(120, 100)) { ir_constant *value = sig->constant_expression_value(actual_parameters, NULL); if (value != NULL) { return value; @@ -950,6 +991,7 @@ process_array_constructor(exec_list *instructions, } bool all_parameters_are_constant = true; + const glsl_type *element_type = constructor_type->fields.array; /* Type cast each parameter and, if possible, fold constants. */ foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { @@ -976,12 +1018,34 @@ process_array_constructor(exec_list *instructions, } } - if (result->type != constructor_type->fields.array) { + if (constructor_type->fields.array->is_unsized_array()) { + /* As the inner parameters of the constructor are created without + * knowledge of each other we need to check to make sure unsized + * parameters of unsized constructors all end up with the same size. + * + * e.g we make sure to fail for a constructor like this: + * vec4[][] a = vec4[][](vec4[](vec4(0.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0))); + */ + if (element_type->is_unsized_array()) { + /* This is the first parameter so just get the type */ + element_type = result->type; + } else if (element_type != result->type) { + _mesa_glsl_error(loc, state, "type error in array constructor: " + "expected: %s, found %s", + element_type->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + } else if (result->type != constructor_type->fields.array) { _mesa_glsl_error(loc, state, "type error in array constructor: " "expected: %s, found %s", constructor_type->fields.array->name, result->type->name); return ir_rvalue::error_value(ctx); + } else { + element_type = result->type; } /* Attempt to convert the parameter to a constant valued expression. @@ -998,6 +1062,14 @@ process_array_constructor(exec_list *instructions, ir->replace_with(result); } + if (constructor_type->fields.array->is_unsized_array()) { + constructor_type = + glsl_type::get_array_instance(element_type, + parameter_count); + assert(constructor_type != NULL); + assert(constructor_type->length == parameter_count); + } + if (all_parameters_are_constant) return new(ctx) ir_constant(constructor_type, &actual_parameters); @@ -1958,6 +2030,17 @@ ast_function_expression::hir(exec_list *instructions, unreachable("not reached"); } +bool +ast_function_expression::has_sequence_subexpression() const +{ + foreach_list_typed(const ast_node, ast, link, &this->expressions) { + if (ast->has_sequence_subexpression()) + return true; + } + + return false; +} + ir_rvalue * ast_aggregate_initializer::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index f38ca84d129..0c11ec58d20 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -782,8 +782,30 @@ validate_assignment(struct _mesa_glsl_parse_state *state, * Note: Whole-array assignments are not permitted in GLSL 1.10, but this * is handled by ir_dereference::is_lvalue. */ - if (lhs->type->is_unsized_array() && rhs->type->is_array() - && (lhs->type->fields.array == rhs->type->fields.array)) { + const glsl_type *lhs_t = lhs->type; + const glsl_type *rhs_t = rhs->type; + bool unsized_array = false; + while(lhs_t->is_array()) { + if (rhs_t == lhs_t) + break; /* the rest of the inner arrays match so break out early */ + if (!rhs_t->is_array()) { + unsized_array = false; + break; /* number of dimensions mismatch */ + } + if (lhs_t->length == rhs_t->length) { + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + continue; + } else if (lhs_t->is_unsized_array()) { + unsized_array = true; + } else { + unsized_array = false; + break; /* sized array mismatch */ + } + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + } + if (unsized_array) { if (is_initializer) { return rhs; } else { @@ -1004,6 +1026,12 @@ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) return NULL; } +bool +ast_node::has_sequence_subexpression() const +{ + return false; +} + void ast_function_expression::hir_no_rvalue(exec_list *instructions, struct _mesa_glsl_parse_state *state) @@ -1805,6 +1833,10 @@ ast_expression::do_hir(exec_list *instructions, break; } + case ast_unsized_array_dim: + assert(!"ast_unsized_array_dim: Should never get here."); + break; + case ast_function_call: /* Should *NEVER* get here. ast_function_call should always be handled * by ast_function_expression::hir. @@ -1916,6 +1948,83 @@ ast_expression::do_hir(exec_list *instructions, return result; } +bool +ast_expression::has_sequence_subexpression() const +{ + switch (this->oper) { + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + case ast_post_inc: + case ast_post_dec: + return this->subexpressions[0]->has_sequence_subexpression(); + + case ast_assign: + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + case ast_mod: + case ast_lshift: + case ast_rshift: + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + case ast_nequal: + case ast_equal: + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + case ast_logic_and: + case ast_logic_or: + case ast_logic_xor: + case ast_array_index: + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_mod_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression(); + + case ast_conditional: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression() || + this->subexpressions[2]->has_sequence_subexpression(); + + case ast_sequence: + return true; + + case ast_field_selection: + case ast_identifier: + case ast_int_constant: + case ast_uint_constant: + case ast_float_constant: + case ast_bool_constant: + case ast_double_constant: + return false; + + case ast_aggregate: + unreachable("ast_aggregate: Should never get here."); + + case ast_function_call: + unreachable("should be handled by ast_function_expression::hir"); + + case ast_unsized_array_dim: + unreachable("ast_unsized_array_dim: Should never get here."); + } + + return false; +} ir_rvalue * ast_expression_statement::hir(exec_list *instructions, @@ -1968,6 +2077,14 @@ process_array_size(exec_node *node, exec_list dummy_instructions; ast_node *array_size = exec_node_data(ast_node, node, link); + + /** + * Dimensions other than the outermost dimension can by unsized if they + * are immediately sized by a constructor or initializer. + */ + if (((ast_expression*)array_size)->oper == ast_unsized_array_dim) + return 0; + ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); YYLTYPE loc = array_size->get_location(); @@ -1990,7 +2107,7 @@ process_array_size(exec_node *node, } ir_constant *const size = ir->constant_expression_value(); - if (size == NULL) { + if (size == NULL || array_size->has_sequence_subexpression()) { _mesa_glsl_error(& loc, state, "array size must be a " "constant valued expression"); return 0; @@ -2028,20 +2145,7 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, * * "Only one-dimensional arrays may be declared." */ - if (!state->ARB_arrays_of_arrays_enable) { - _mesa_glsl_error(loc, state, - "invalid array of `%s'" - "GL_ARB_arrays_of_arrays " - "required for defining arrays of arrays", - base->name); - return glsl_type::error_type; - } - - if (base->length == 0) { - _mesa_glsl_error(loc, state, - "only the outermost array dimension can " - "be unsized", - base->name); + if (!state->check_arrays_of_arrays_allowed(loc)) { return glsl_type::error_type; } } @@ -2051,9 +2155,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, unsigned array_size = process_array_size(node, state); array_type = glsl_type::get_array_instance(array_type, array_size); } - - if (array_specifier->is_unsized_array) - array_type = glsl_type::get_array_instance(array_type, 0); } return array_type; @@ -2592,6 +2693,25 @@ is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state, return false; } +static inline void +validate_array_dimensions(const glsl_type *t, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { + if (t->is_array()) { + t = t->fields.array; + while (t->is_array()) { + if (t->is_unsized_array()) { + _mesa_glsl_error(loc, state, + "only the outermost array dimension can " + "be unsized", + t->name); + break; + } + t = t->fields.array; + } + } +} + static void apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, ir_variable *var, @@ -3171,7 +3291,8 @@ process_initializer(ir_variable *var, ast_declaration *decl, */ if (var->data.mode == ir_var_uniform) { state->check_version(120, 0, &initializer_loc, - "cannot initialize uniforms"); + "cannot initialize uniform %s", + var->name); } /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: @@ -3179,8 +3300,9 @@ process_initializer(ir_variable *var, ast_declaration *decl, * "Buffer variables cannot have initializers." */ if (var->data.mode == ir_var_shader_storage) { - _mesa_glsl_error(& initializer_loc, state, - "SSBO variables cannot have initializers"); + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize buffer variable %s", + var->name); } /* From section 4.1.7 of the GLSL 4.40 spec: @@ -3190,16 +3312,25 @@ process_initializer(ir_variable *var, ast_declaration *decl, * shader." */ if (var->type->contains_opaque()) { - _mesa_glsl_error(& initializer_loc, state, - "cannot initialize opaque variable"); + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize opaque variable %s", + var->name); } if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) { - _mesa_glsl_error(& initializer_loc, state, - "cannot initialize %s shader input / %s", - _mesa_shader_stage_to_string(state->stage), - (state->stage == MESA_SHADER_VERTEX) - ? "attribute" : "varying"); + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader input / %s %s", + _mesa_shader_stage_to_string(state->stage), + (state->stage == MESA_SHADER_VERTEX) + ? "attribute" : "varying", + var->name); + } + + if (var->data.mode == ir_var_shader_out && state->current_function == NULL) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader output %s", + _mesa_shader_stage_to_string(state->stage), + var->name); } /* If the initializer is an ast_aggregate_initializer, recursively store @@ -3214,16 +3345,72 @@ process_initializer(ir_variable *var, ast_declaration *decl, /* Calculate the constant value if this is a const or uniform * declaration. + * + * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says: + * + * "Declarations of globals without a storage qualifier, or with + * just the const qualifier, may include initializers, in which case + * they will be initialized before the first line of main() is + * executed. Such initializers must be a constant expression." + * + * The same section of the GLSL ES 3.00.4 spec has similar language. */ if (type->qualifier.flags.q.constant - || type->qualifier.flags.q.uniform) { + || type->qualifier.flags.q.uniform + || (state->es_shader && state->current_function == NULL)) { ir_rvalue *new_rhs = validate_assignment(state, initializer_loc, lhs, rhs, true); if (new_rhs != NULL) { rhs = new_rhs; + /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec + * says: + * + * "A constant expression is one of + * + * ... + * + * - an expression formed by an operator on operands that are + * all constant expressions, including getting an element of + * a constant array, or a field of a constant structure, or + * components of a constant vector. However, the sequence + * operator ( , ) and the assignment operators ( =, +=, ...) + * are not included in the operators that can create a + * constant expression." + * + * Section 12.43 (Sequence operator and constant expressions) says: + * + * "Should the following construct be allowed? + * + * float a[2,3]; + * + * The expression within the brackets uses the sequence operator + * (',') and returns the integer 3 so the construct is declaring + * a single-dimensional array of size 3. In some languages, the + * construct declares a two-dimensional array. It would be + * preferable to make this construct illegal to avoid confusion. + * + * One possibility is to change the definition of the sequence + * operator so that it does not return a constant-expression and + * hence cannot be used to declare an array size. + * + * RESOLUTION: The result of a sequence operator is not a + * constant-expression." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec + * contains language almost identical to the section 4.3.3 in the + * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL + * versions. + */ ir_constant *constant_value = rhs->constant_expression_value(); - if (!constant_value) { + if (!constant_value || + (state->is_version(430, 300) && + decl->initializer->has_sequence_subexpression())) { + const char *const variable_mode = + (type->qualifier.flags.q.constant) + ? "const" + : ((type->qualifier.flags.q.uniform) ? "uniform" : "global"); + /* If ARB_shading_language_420pack is enabled, initializers of * const-qualified local variables do not have to be constant * expressions. Const-qualified global variables must still be @@ -3234,22 +3421,24 @@ process_initializer(ir_variable *var, ast_declaration *decl, _mesa_glsl_error(& initializer_loc, state, "initializer of %s variable `%s' must be a " "constant expression", - (type->qualifier.flags.q.constant) - ? "const" : "uniform", + variable_mode, decl->identifier); if (var->type->is_numeric()) { /* Reduce cascading errors. */ - var->constant_value = ir_constant::zero(state, var->type); + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; } } } else { rhs = constant_value; - var->constant_value = constant_value; + var->constant_value = type->qualifier.flags.q.constant + ? constant_value : NULL; } } else { if (var->type->is_numeric()) { /* Reduce cascading errors. */ - var->constant_value = ir_constant::zero(state, var->type); + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; } } } @@ -4265,6 +4454,8 @@ ast_declarator_list::hir(exec_list *instructions, result = process_initializer((earlier == NULL) ? var : earlier, decl, this->type, &initializer_instructions, state); + } else { + validate_array_dimensions(var_type, state, &loc); } /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec: @@ -5790,6 +5981,7 @@ ast_process_structure_or_interface_block(exec_list *instructions, const struct glsl_type *field_type = process_array_type(&loc, decl_type, decl->array_specifier, state); + validate_array_dimensions(field_type, state, &loc); fields[i].type = field_type; fields[i].name = decl->identifier; fields[i].location = -1; @@ -6142,7 +6334,8 @@ ast_interface_block::hir(exec_list *instructions, _mesa_shader_stage_to_string(state->stage)); } if (this->instance_name == NULL || - strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL) { + strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL || + !this->array_specifier->is_single_dimension()) { _mesa_glsl_error(&loc, state, "gl_PerVertex input must be redeclared as " "gl_in[]"); @@ -6305,6 +6498,9 @@ ast_interface_block::hir(exec_list *instructions, ir_variable *var; if (this->array_specifier != NULL) { + const glsl_type *block_array_type = + process_array_type(&loc, block_type, this->array_specifier, state); + /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says: * * For uniform blocks declared an array, each individual array @@ -6328,7 +6524,7 @@ ast_interface_block::hir(exec_list *instructions, * tessellation control shader output, and tessellation evaluation * shader input. */ - if (this->array_specifier->is_unsized_array) { + if (block_array_type->is_unsized_array()) { bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY || state->stage == MESA_SHADER_TESS_CTRL || state->stage == MESA_SHADER_TESS_EVAL; @@ -6355,9 +6551,6 @@ ast_interface_block::hir(exec_list *instructions, } } - const glsl_type *block_array_type = - process_array_type(&loc, block_type, this->array_specifier, state); - /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: * * * Arrays of arrays of blocks are not allowed diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index f0f6be21b7d..aae25f893e8 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -403,7 +403,7 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state) static bool shader_storage_buffer_object(const _mesa_glsl_parse_state *state) { - return state->ARB_shader_storage_buffer_object_enable; + return state->has_shader_storage_buffer_objects(); } static bool diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp index 0aedbb3546a..bbdcd199e92 100644 --- a/src/glsl/builtin_types.cpp +++ b/src/glsl/builtin_types.cpp @@ -43,9 +43,7 @@ * convenience pointers (glsl_type::foo_type). * @{ */ -#define DECL_TYPE(NAME, ...) \ - const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ - const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; +#define DECL_TYPE(NAME, ...) #define STRUCT_TYPE(NAME) \ const glsl_type glsl_type::_struct_##NAME##_type = \ diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index c1bcccc34f4..cd00f6e085b 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -1962,7 +1962,9 @@ array_specifier: '[' ']' { void *ctx = state; - $$ = new(ctx) ast_array_specifier(@1); + $$ = new(ctx) ast_array_specifier(@1, new(ctx) ast_expression( + ast_unsized_array_dim, NULL, + NULL, NULL)); $$->set_location_range(@1, @2); } | '[' constant_expression ']' @@ -1973,29 +1975,21 @@ array_specifier: } | array_specifier '[' ']' { + void *ctx = state; $$ = $1; - if (!state->ARB_arrays_of_arrays_enable) { - _mesa_glsl_error(& @1, state, - "GL_ARB_arrays_of_arrays " - "required for defining arrays of arrays"); - } else { - _mesa_glsl_error(& @1, state, - "only the outermost array dimension can " - "be unsized"); + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension(new(ctx) ast_expression(ast_unsized_array_dim, NULL, + NULL, NULL)); } } | array_specifier '[' constant_expression ']' { $$ = $1; - if (!state->ARB_arrays_of_arrays_enable) { - _mesa_glsl_error(& @1, state, - "GL_ARB_arrays_of_arrays " - "required for defining arrays of arrays"); + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension($3); } - - $$->add_dimension($3); } ; diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 7fee43ece52..e8740f9ecb9 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -115,6 +115,20 @@ struct _mesa_glsl_parse_state { unsigned required_glsl_es_version, YYLTYPE *locp, const char *fmt, ...) PRINTFLIKE(5, 6); + bool check_arrays_of_arrays_allowed(YYLTYPE *locp) + { + if (!(ARB_arrays_of_arrays_enable || is_version(430, 310))) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.10" + : "GL_ARB_arrays_of_arrays or GLSL 4.30"; + _mesa_glsl_error(locp, this, + "%s required for defining arrays of arrays.", + requirement); + return false; + } + return true; + } + bool check_precision_qualifiers_allowed(YYLTYPE *locp) { return check_version(130, 100, locp, diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 2c45b9edc0f..8933b230177 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -662,6 +662,22 @@ ir_expression::get_operator(const char *str) return (ir_expression_operation) -1; } +ir_variable * +ir_expression::variable_referenced() const +{ + switch (operation) { + case ir_binop_vector_extract: + case ir_triop_vector_insert: + /* We get these for things like a[0] where a is a vector type. In these + * cases we want variable_referenced() to return the actual vector + * variable this is wrapping. + */ + return operands[0]->variable_referenced(); + default: + return ir_rvalue::variable_referenced(); + } +} + ir_constant::ir_constant() : ir_rvalue(ir_type_constant) { @@ -1673,8 +1689,8 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, if (type->is_interface()) this->init_interface_type(type); - else if (type->is_array() && type->fields.array->is_interface()) - this->init_interface_type(type->fields.array); + else if (type->without_array()->is_interface()) + this->init_interface_type(type->without_array()); } } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 43a2bf0ae1c..9c9f22d018b 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1731,6 +1731,8 @@ public: virtual ir_visitor_status accept(ir_hierarchical_visitor *); + virtual ir_variable *variable_referenced() const; + ir_expression_operation operation; ir_rvalue *operands[4]; }; diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 309b6b72b5b..67ed3605a8c 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -36,6 +36,7 @@ #include <math.h> #include "main/core.h" /* for MAX2, MIN2, CLAMP */ #include "util/rounding.h" /* for _mesa_roundeven */ +#include "util/half_float.h" #include "ir.h" #include "glsl_types.h" #include "program/hash_table.h" diff --git a/src/glsl/ir_set_program_inouts.cpp b/src/glsl/ir_set_program_inouts.cpp index b7a0f6e95ba..d7c29b00f88 100644 --- a/src/glsl/ir_set_program_inouts.cpp +++ b/src/glsl/ir_set_program_inouts.cpp @@ -242,6 +242,12 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var, type = type->fields.array; } + /* TODO: implement proper arrays of arrays support + * for now let the caller mark whole variable as used. + */ + if (type->is_array() && type->fields.array->is_array()) + return false; + /* The code below only handles: * * - Indexing into matrices diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h index 50fe76b7ea2..1854279925b 100644 --- a/src/glsl/ir_uniform.h +++ b/src/glsl/ir_uniform.h @@ -162,6 +162,22 @@ struct gl_uniform_storage { /** @} */ /** + * This is a compiler-generated uniform that should not be advertised + * via the API. + */ + bool hidden; + + /** + * This is a built-in uniform that should not be modified through any gl API. + */ + bool builtin; + + /** + * This is a shader storage buffer variable, not an uniform. + */ + bool is_shader_storage; + + /** * Index within gl_shader_program::AtomicBuffers[] of the atomic * counter buffer this uniform is stored in, or -1 if this is not * an atomic counter. @@ -181,20 +197,16 @@ struct gl_uniform_storage { unsigned num_compatible_subroutines; /** - * This is a compiler-generated uniform that should not be advertised - * via the API. + * A single integer identifying the number of active array elements of + * the top-level shader storage block member (GL_TOP_LEVEL_ARRAY_SIZE). */ - bool hidden; + unsigned top_level_array_size; /** - * This is a built-in uniform that should not be modified through any gl API. + * A single integer identifying the stride between array elements of the + * top-level shader storage block member. (GL_TOP_LEVEL_ARRAY_STRIDE). */ - bool builtin; - - /** - * This is a shader storage buffer variable, not an uniform. - */ - bool is_shader_storage; + unsigned top_level_array_stride; }; #ifdef __cplusplus diff --git a/src/glsl/ir_variable_refcount.cpp b/src/glsl/ir_variable_refcount.cpp index e4d825c454b..790627bd1e3 100644 --- a/src/glsl/ir_variable_refcount.cpp +++ b/src/glsl/ir_variable_refcount.cpp @@ -46,6 +46,15 @@ static void free_entry(struct hash_entry *entry) { ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data; + + /* Free assignment list */ + exec_node *n; + while ((n = ivre->assign_list.pop_head()) != NULL) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, n, link); + free(assignment_entry); + } + delete ivre; } @@ -59,7 +68,6 @@ ir_variable_refcount_visitor::~ir_variable_refcount_visitor() ir_variable_refcount_entry::ir_variable_refcount_entry(ir_variable *var) { this->var = var; - assign = NULL; assigned_count = 0; declaration = false; referenced_count = 0; @@ -125,8 +133,20 @@ ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) entry = this->get_variable_entry(ir->lhs->variable_referenced()); if (entry) { entry->assigned_count++; - if (entry->assign == NULL) - entry->assign = ir; + + /* Build a list for dead code optimisation. Don't add assignment if it + * was declared out of scope (outside the instruction stream). Also don't + * bother adding any more to the list if there are more references than + * assignments as this means the variable is used and won't be optimised + * out. + */ + assert(entry->referenced_count >= entry->assigned_count); + if (entry->referenced_count == entry->assigned_count) { + struct assignment_entry *assignment_entry = + (struct assignment_entry *)calloc(1, sizeof(*assignment_entry)); + assignment_entry->assign = ir; + entry->assign_list.push_head(&assignment_entry->link); + } } return visit_continue; diff --git a/src/glsl/ir_variable_refcount.h b/src/glsl/ir_variable_refcount.h index c15e8110d04..5c74c314781 100644 --- a/src/glsl/ir_variable_refcount.h +++ b/src/glsl/ir_variable_refcount.h @@ -33,13 +33,24 @@ #include "ir_visitor.h" #include "glsl_types.h" +struct assignment_entry { + exec_node link; + ir_assignment *assign; +}; + class ir_variable_refcount_entry { public: ir_variable_refcount_entry(ir_variable *var); ir_variable *var; /* The key: the variable's pointer. */ - ir_assignment *assign; /* An assignment to the variable, if any */ + + /** + * List of assignments to the variable, if any. + * This is intended to be used for dead code optimisation and may + * not be a complete list. + */ + exec_list assign_list; /** Number of times the variable is referenced, including assignments. */ unsigned referenced_count; diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp index 100d03c4e8f..70ef0e1c891 100644 --- a/src/glsl/link_atomics.cpp +++ b/src/glsl/link_atomics.cpp @@ -33,7 +33,7 @@ namespace { * Atomic counter as seen by the program. */ struct active_atomic_counter { - unsigned id; + unsigned uniform_loc; ir_variable *var; }; @@ -52,7 +52,7 @@ namespace { free(counters); } - void push_back(unsigned id, ir_variable *var) + void push_back(unsigned uniform_loc, ir_variable *var) { active_atomic_counter *new_counters; @@ -66,7 +66,7 @@ namespace { } counters = new_counters; - counters[num_counters].id = id; + counters[num_counters].uniform_loc = uniform_loc; counters[num_counters].var = var; num_counters++; } @@ -95,6 +95,50 @@ namespace { y->data.atomic.offset < x->data.atomic.offset + x->type->atomic_size())); } + void + process_atomic_variable(const glsl_type *t, struct gl_shader_program *prog, + unsigned *uniform_loc, ir_variable *var, + active_atomic_buffer *const buffers, + unsigned *num_buffers, int *offset, + const unsigned shader_stage) + { + /* FIXME: Arrays of arrays get counted separately. For example: + * x1[3][3][2] = 9 counters + * x2[3][2] = 3 counters + * x3[2] = 1 counter + * + * However this code marks all the counters as active even when they + * might not be used. + */ + if (t->is_array() && t->fields.array->is_array()) { + for (unsigned i = 0; i < t->length; i++) { + process_atomic_variable(t->fields.array, prog, uniform_loc, + var, buffers, num_buffers, offset, + shader_stage); + } + } else { + active_atomic_buffer *buf = &buffers[var->data.binding]; + gl_uniform_storage *const storage = + &prog->UniformStorage[*uniform_loc]; + + /* If this is the first time the buffer is used, increment + * the counter of buffers used. + */ + if (buf->size == 0) + (*num_buffers)++; + + buf->push_back(*uniform_loc, var); + + buf->stage_references[shader_stage]++; + buf->size = MAX2(buf->size, *offset + t->atomic_size()); + + storage->offset = *offset; + *offset += t->atomic_size(); + + (*uniform_loc)++; + } + } + active_atomic_buffer * find_active_atomic_counters(struct gl_context *ctx, struct gl_shader_program *prog, @@ -114,23 +158,10 @@ namespace { ir_variable *var = node->as_variable(); if (var && var->type->contains_atomic()) { - unsigned id = 0; - bool found = prog->UniformHash->get(id, var->name); - assert(found); - (void) found; - active_atomic_buffer *buf = &buffers[var->data.binding]; - - /* If this is the first time the buffer is used, increment - * the counter of buffers used. - */ - if (buf->size == 0) - (*num_buffers)++; - - buf->push_back(id, var); - - buf->stage_references[i]++; - buf->size = MAX2(buf->size, var->data.atomic.offset + - var->type->atomic_size()); + int offset = var->data.atomic.offset; + unsigned uniform_loc = var->data.location; + process_atomic_variable(var->type, prog, &uniform_loc, + var, buffers, num_buffers, &offset, i); } } } @@ -197,10 +228,10 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, /* Assign counter-specific fields. */ for (unsigned j = 0; j < ab.num_counters; j++) { ir_variable *const var = ab.counters[j].var; - const unsigned id = ab.counters[j].id; - gl_uniform_storage *const storage = &prog->UniformStorage[id]; + gl_uniform_storage *const storage = + &prog->UniformStorage[ab.counters[j].uniform_loc]; - mab.Uniforms[j] = id; + mab.Uniforms[j] = ab.counters[j].uniform_loc; if (!var->data.explicit_binding) var->data.binding = i; diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp index bcf17fef758..422739af063 100644 --- a/src/glsl/link_uniform_block_active_visitor.cpp +++ b/src/glsl/link_uniform_block_active_visitor.cpp @@ -71,6 +71,88 @@ process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) return NULL; } +/* For arrays of arrays this function will give us a middle ground between + * detecting inactive uniform blocks and structuring them in a way that makes + * it easy to calculate the offset for indirect indexing. + * + * For example given the shader: + * + * uniform ArraysOfArraysBlock + * { + * vec4 a; + * } i[3][4][5]; + * + * void main() + * { + * vec4 b = i[0][1][1].a; + * gl_Position = i[2][2][3].a + b; + * } + * + * There are only 2 active blocks above but for the sake of indirect indexing + * and not over complicating the code we will end up with a count of 8. + * Here each dimension has 2 different indices counted so we end up with 2*2*2 + */ +struct uniform_block_array_elements ** +process_arrays(void *mem_ctx, ir_dereference_array *ir, + struct link_uniform_block_active *block) +{ + if (ir) { + struct uniform_block_array_elements **ub_array_ptr = + process_arrays(mem_ctx, ir->array->as_dereference_array(), block); + if (*ub_array_ptr == NULL) { + *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements); + (*ub_array_ptr)->ir = ir; + } + + struct uniform_block_array_elements *ub_array = *ub_array_ptr; + ir_constant *c = ir->array_index->as_constant(); + if (c) { + /* Index is a constant, so mark just that element used, + * if not already. + */ + const unsigned idx = c->get_uint_component(0); + + unsigned i; + for (i = 0; i < ub_array->num_array_elements; i++) { + if (ub_array->array_elements[i] == idx) + break; + } + + assert(i <= ub_array->num_array_elements); + + if (i == ub_array->num_array_elements) { + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements + 1); + + ub_array->array_elements[ub_array->num_array_elements] = idx; + + ub_array->num_array_elements++; + } + } else { + /* The array index is not a constant, + * so mark the entire array used. + */ + assert(ir->array->type->is_array()); + if (ub_array->num_array_elements < ir->array->type->length) { + ub_array->num_array_elements = ir->array->type->length; + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements); + + for (unsigned i = 0; i < ub_array->num_array_elements; i++) { + ub_array->array_elements[i] = i; + } + } + } + return &ub_array->array; + } else { + return &block->array; + } +} + ir_visitor_status link_uniform_block_active_visitor::visit(ir_variable *var) { @@ -101,24 +183,30 @@ link_uniform_block_active_visitor::visit(ir_variable *var) return visit_stop; } - assert(b->num_array_elements == 0); - assert(b->array_elements == NULL); + assert(b->array == NULL); assert(b->type != NULL); assert(!b->type->is_array() || b->has_instance_name); /* For uniform block arrays declared with a shared or std140 layout * qualifier, mark all its instances as used. */ - if (b->type->is_array() && b->type->length > 0) { - b->num_array_elements = b->type->length; - b->array_elements = reralloc(this->mem_ctx, - b->array_elements, - unsigned, - b->num_array_elements); - - for (unsigned i = 0; i < b->num_array_elements; i++) { - b->array_elements[i] = i; + const glsl_type *type = b->type; + struct uniform_block_array_elements **ub_array = &b->array; + while (type->is_array()) { + assert(b->type->length > 0); + + *ub_array = rzalloc(this->mem_ctx, struct uniform_block_array_elements); + (*ub_array)->num_array_elements = type->length; + (*ub_array)->array_elements = reralloc(this->mem_ctx, + (*ub_array)->array_elements, + unsigned, + (*ub_array)->num_array_elements); + + for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) { + (*ub_array)->array_elements[i] = i; } + ub_array = &(*ub_array)->array; + type = type->fields.array; } return visit_continue; @@ -127,7 +215,13 @@ link_uniform_block_active_visitor::visit(ir_variable *var) ir_visitor_status link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) { - ir_dereference_variable *const d = ir->array->as_dereference_variable(); + /* cycle through arrays of arrays */ + ir_dereference_array *base_ir = ir; + while (base_ir->array->ir_type == ir_type_dereference_array) + base_ir = base_ir->array->as_dereference_array(); + + ir_dereference_variable *const d = + base_ir->array->as_dereference_variable(); ir_variable *const var = (d == NULL) ? NULL : d->var; /* If the r-value being dereferenced is not a variable (e.g., a field of a @@ -158,55 +252,16 @@ link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) /* Block arrays must be declared with an instance name. */ assert(b->has_instance_name); - assert((b->num_array_elements == 0) == (b->array_elements == NULL)); assert(b->type != NULL); /* If the block array was declared with a shared or * std140 layout qualifier, all its instances have been already marked * as used in link_uniform_block_active_visitor::visit(ir_variable *). */ - if (var->get_interface_type()->interface_packing != - GLSL_INTERFACE_PACKING_PACKED) - return visit_continue_with_parent; - - ir_constant *c = ir->array_index->as_constant(); - - if (c) { - /* Index is a constant, so mark just that element used, if not already */ - const unsigned idx = c->get_uint_component(0); - - unsigned i; - for (i = 0; i < b->num_array_elements; i++) { - if (b->array_elements[i] == idx) - break; - } - - assert(i <= b->num_array_elements); - - if (i == b->num_array_elements) { - b->array_elements = reralloc(this->mem_ctx, - b->array_elements, - unsigned, - b->num_array_elements + 1); - - b->array_elements[b->num_array_elements] = idx; - - b->num_array_elements++; - } - } else { - /* The array index is not a constant, so mark the entire array used. */ - assert(b->type->is_array()); - if (b->num_array_elements < b->type->length) { - b->num_array_elements = b->type->length; - b->array_elements = reralloc(this->mem_ctx, - b->array_elements, - unsigned, - b->num_array_elements); - - for (unsigned i = 0; i < b->num_array_elements; i++) { - b->array_elements[i] = i; - } - } + if (var->get_interface_type()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED) { + b->var = var; + process_arrays(this->mem_ctx, ir, b); } return visit_continue_with_parent; @@ -234,8 +289,7 @@ link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) return visit_stop; } - assert(b->num_array_elements == 0); - assert(b->array_elements == NULL); + assert(b->array == NULL); assert(b->type != NULL); return visit_continue; diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h index b663a884db4..afb52c14a37 100644 --- a/src/glsl/link_uniform_block_active_visitor.h +++ b/src/glsl/link_uniform_block_active_visitor.h @@ -28,11 +28,20 @@ #include "ir.h" #include "util/hash_table.h" +struct uniform_block_array_elements { + unsigned *array_elements; + unsigned num_array_elements; + + ir_dereference_array *ir; + + struct uniform_block_array_elements *array; +}; + struct link_uniform_block_active { const glsl_type *type; + ir_variable *var; - unsigned *array_elements; - unsigned num_array_elements; + struct uniform_block_array_elements *array; unsigned binding; diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index 7ceffee799e..5285d8d01e4 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -116,7 +116,7 @@ private: char *open_bracket = strchr(v->IndexName, '['); assert(open_bracket != NULL); - char *close_bracket = strchr(open_bracket, ']'); + char *close_bracket = strchr(open_bracket, '.') - 1; assert(close_bracket != NULL); /* Length of the tail without the ']' but with the NUL. @@ -185,6 +185,91 @@ struct block { bool has_instance_name; }; +static void +process_block_array(struct uniform_block_array_elements *ub_array, char **name, + size_t name_length, gl_uniform_block *blocks, + ubo_visitor *parcel, gl_uniform_buffer_variable *variables, + const struct link_uniform_block_active *const b, + unsigned *block_index, unsigned *binding_offset, + struct gl_context *ctx, struct gl_shader_program *prog) +{ + if (ub_array) { + for (unsigned j = 0; j < ub_array->num_array_elements; j++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", + ub_array->array_elements[j]); + + process_block_array(ub_array->array, name, new_length, blocks, + parcel, variables, b, block_index, + binding_offset, ctx, prog); + } + } else { + unsigned i = *block_index; + const glsl_type *type = b->type->without_array(); + + blocks[i].Name = ralloc_strdup(blocks, *name); + blocks[i].Uniforms = &variables[(*parcel).index]; + + /* The GL_ARB_shading_language_420pack spec says: + * + * "If the binding identifier is used with a uniform block + * instanced as an array then the first element of the array + * takes the specified block binding and each subsequent + * element takes the next consecutive uniform block binding + * point." + */ + blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0; + + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing); + + parcel->process(type, blocks[i].Name); + + blocks[i].UniformBufferSize = parcel->buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO */ + if (b->is_shader_storage && + parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + b->type->name, + parcel->buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); + blocks[i].IsShaderStorage = b->is_shader_storage; + + *block_index = *block_index + 1; + *binding_offset = *binding_offset + 1; + } +} + +/* This function resizes the array types of the block so that later we can use + * this new size to correctly calculate the offest for indirect indexing. + */ +const glsl_type * +resize_block_array(const glsl_type *type, + struct uniform_block_array_elements *ub_array) +{ + if (type->is_array()) { + struct uniform_block_array_elements *child_array = + type->fields.array->is_array() ? ub_array->array : NULL; + const glsl_type *new_child_type = + resize_block_array(type->fields.array, child_array); + + const glsl_type *new_type = + glsl_type::get_array_instance(new_child_type, + ub_array->num_array_elements); + ub_array->ir->array->type = new_type; + return new_type; + } else { + return type; + } +} + unsigned link_uniform_blocks(void *mem_ctx, struct gl_context *ctx, @@ -223,21 +308,25 @@ link_uniform_blocks(void *mem_ctx, struct hash_entry *entry; hash_table_foreach (block_hash, entry) { - const struct link_uniform_block_active *const b = - (const struct link_uniform_block_active *) entry->data; + struct link_uniform_block_active *const b = + (struct link_uniform_block_active *) entry->data; - const glsl_type *const block_type = - b->type->is_array() ? b->type->fields.array : b->type; + assert((b->array != NULL) == b->type->is_array()); - assert((b->num_array_elements > 0) == b->type->is_array()); + if (b->array != NULL && + (b->type->without_array()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED)) { + b->type = resize_block_array(b->type, b->array); + b->var->type = b->type; + } block_size.num_active_uniforms = 0; - block_size.process(block_type, ""); + block_size.process(b->type->without_array(), ""); - if (b->num_array_elements > 0) { - num_blocks += b->num_array_elements; - num_variables += b->num_array_elements - * block_size.num_active_uniforms; + if (b->array != NULL) { + unsigned aoa_size = b->type->arrays_of_arrays_size(); + num_blocks += aoa_size; + num_variables += aoa_size * block_size.num_active_uniforms; } else { num_blocks++; num_variables += block_size.num_active_uniforms; @@ -281,50 +370,15 @@ link_uniform_blocks(void *mem_ctx, (const struct link_uniform_block_active *) entry->data; const glsl_type *block_type = b->type; - if (b->num_array_elements > 0) { - const char *const name = block_type->fields.array->name; + if (b->array != NULL) { + unsigned binding_offset = 0; + char *name = ralloc_strdup(NULL, block_type->without_array()->name); + size_t name_length = strlen(name); assert(b->has_instance_name); - for (unsigned j = 0; j < b->num_array_elements; j++) { - blocks[i].Name = ralloc_asprintf(blocks, "%s[%u]", name, - b->array_elements[j]); - blocks[i].Uniforms = &variables[parcel.index]; - - /* The GL_ARB_shading_language_420pack spec says: - * - * "If the binding identifier is used with a uniform block - * instanced as an array then the first element of the array - * takes the specified block binding and each subsequent - * element takes the next consecutive uniform block binding - * point." - */ - blocks[i].Binding = (b->has_binding) ? b->binding + j : 0; - - blocks[i].UniformBufferSize = 0; - blocks[i]._Packing = - gl_uniform_block_packing(block_type->interface_packing); - - parcel.process(block_type->fields.array, - blocks[i].Name); - - blocks[i].UniformBufferSize = parcel.buffer_size; - - /* Check SSBO size is lower than maximum supported size for SSBO */ - if (b->is_shader_storage && - parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { - linker_error(prog, "shader storage block `%s' has size %d, " - "which is larger than than the maximum allowed (%d)", - block_type->name, - parcel.buffer_size, - ctx->Const.MaxShaderStorageBlockSize); - } - blocks[i].NumUniforms = - (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); - - blocks[i].IsShaderStorage = b->is_shader_storage; - - i++; - } + process_block_array(b->array, &name, name_length, blocks, &parcel, + variables, b, &i, &binding_offset, ctx, prog); + ralloc_free(name); } else { blocks[i].Name = ralloc_strdup(blocks, block_type->name); blocks[i].Uniforms = &variables[parcel.index]; diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp index e9e108a2765..35b9f9c6017 100644 --- a/src/glsl/link_uniform_initializers.cpp +++ b/src/glsl/link_uniform_initializers.cpp @@ -49,7 +49,7 @@ get_uniform_block_index(const gl_shader_program *shProg, const char *uniformBlockName) { for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - if (!strcmp(shProg->UniformBlocks[i].Name, uniformBlockName)) + if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName)) return i; } @@ -107,51 +107,64 @@ copy_constant_to_storage(union gl_constant_value *storage, * they have no storage and should be handled elsewhere. */ void -set_opaque_binding(gl_shader_program *prog, const char *name, int binding) +set_opaque_binding(void *mem_ctx, gl_shader_program *prog, + const glsl_type *type, const char *name, int *binding) { - struct gl_uniform_storage *const storage = - get_storage(prog->UniformStorage, prog->NumUniformStorage, name); - if (storage == NULL) { - assert(storage != NULL); - return; - } + if (type->is_array() && type->fields.array->is_array()) { + const glsl_type *const element_type = type->fields.array; - const unsigned elements = MAX2(storage->array_elements, 1); + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); - /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec - * says: - * - * "If the binding identifier is used with an array, the first element - * of the array takes the specified unit and each subsequent element - * takes the next consecutive unit." - */ - for (unsigned int i = 0; i < elements; i++) { - storage->storage[i].i = binding + i; - } + set_opaque_binding(mem_ctx, prog, element_type, + element_name, binding); + } + } else { + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, prog->NumUniformStorage, name); - for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - gl_shader *shader = prog->_LinkedShaders[sh]; + if (storage == NULL) { + assert(storage != NULL); + return; + } - if (shader) { - if (storage->type->base_type == GLSL_TYPE_SAMPLER && - storage->opaque[sh].active) { - for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->opaque[sh].index + i; - shader->SamplerUnits[index] = storage->storage[i].i; - } + const unsigned elements = MAX2(storage->array_elements, 1); + + /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec + * says: + * + * "If the binding identifier is used with an array, the first element + * of the array takes the specified unit and each subsequent element + * takes the next consecutive unit." + */ + for (unsigned int i = 0; i < elements; i++) { + storage->storage[i].i = (*binding)++; + } + + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; - } else if (storage->type->base_type == GLSL_TYPE_IMAGE && + if (shader) { + if (storage->type->base_type == GLSL_TYPE_SAMPLER && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->SamplerUnits[index] = storage->storage[i].i; + } + + } else if (storage->type->base_type == GLSL_TYPE_IMAGE && storage->opaque[sh].active) { - for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->opaque[sh].index + i; - shader->ImageUnits[index] = storage->storage[i].i; + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->ImageUnits[index] = storage->storage[i].i; + } } } } - } - storage->initialized = true; + storage->initialized = true; + } } void @@ -170,7 +183,7 @@ set_block_binding(gl_shader_program *prog, const char *block_name, int binding) if (stage_index != -1) { struct gl_shader *sh = prog->_LinkedShaders[i]; - sh->UniformBlocks[stage_index].Binding = binding; + sh->BufferInterfaceBlocks[stage_index].Binding = binding; } } } @@ -180,6 +193,7 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, const char *name, const glsl_type *type, ir_constant *val, unsigned int boolean_true) { + const glsl_type *t_without_array = type->without_array(); if (type->is_record()) { ir_constant *field_constant; @@ -194,7 +208,8 @@ set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, field_constant = (ir_constant *)field_constant->next; } return; - } else if (type->is_array() && type->fields.array->is_record()) { + } else if (t_without_array->is_record() || + (type->is_array() && type->fields.array->is_array())) { const glsl_type *const element_type = type->fields.array; for (unsigned int i = 0; i < type->length; i++) { @@ -284,7 +299,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog, if (type->without_array()->is_sampler() || type->without_array()->is_image()) { - linker::set_opaque_binding(prog, var->name, var->data.binding); + int binding = var->data.binding; + linker::set_opaque_binding(mem_ctx, prog, var->type, + var->name, &binding); } else if (var->is_in_buffer_block()) { const glsl_type *const iface_type = var->get_interface_type(); @@ -327,9 +344,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog, } else { assert(!"Explicit binding not on a sampler, UBO or atomic."); } - } else if (var->constant_value) { + } else if (var->constant_initializer) { linker::set_uniform_initializer(mem_ctx, prog, var->name, - var->type, var->constant_value, + var->type, var->constant_initializer, boolean_true); } } diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 0ccd9c8c865..fe00aa30d07 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -149,7 +149,8 @@ program_resource_visitor::process(ir_variable *var) recursion(var->type, &name, strlen(name), row_major, NULL, packing, false, record_array_count); ralloc_free(name); - } else if (t->without_array()->is_record()) { + } else if (t_without_array->is_record() || + (t->is_array() && t->fields.array->is_array())) { char *name = ralloc_strdup(NULL, var->name); recursion(var->type, &name, strlen(name), row_major, NULL, packing, false, record_array_count); @@ -160,6 +161,7 @@ program_resource_visitor::process(ir_variable *var) false, record_array_count); ralloc_free(name); } else { + this->set_record_array_count(record_array_count); this->visit_field(t, var->name, row_major, NULL, packing, false); } } @@ -231,7 +233,8 @@ program_resource_visitor::recursion(const glsl_type *t, char **name, this->leave_record(t, *name, row_major, packing); } } else if (t->without_array()->is_record() || - t->without_array()->is_interface()) { + t->without_array()->is_interface() || + (t->is_array() && t->fields.array->is_array())) { if (record_type == NULL && t->fields.array->is_record()) record_type = t->fields.array; @@ -387,6 +390,7 @@ private: { assert(!type->without_array()->is_record()); assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); (void) row_major; @@ -502,9 +506,9 @@ public: for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { if (strncmp(var->get_interface_type()->name, - prog->UniformBlocks[i].Name, + prog->BufferInterfaceBlocks[i].Name, l) == 0 - && prog->UniformBlocks[i].Name[l] == '[') { + && prog->BufferInterfaceBlocks[i].Name[l] == '[') { ubo_block_index = i; break; } @@ -512,7 +516,7 @@ public: } else { for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { if (strcmp(var->get_interface_type()->name, - prog->UniformBlocks[i].Name) == 0) { + prog->BufferInterfaceBlocks[i].Name) == 0) { ubo_block_index = i; break; } @@ -530,7 +534,7 @@ public: ubo_byte_offset = 0; } else { const struct gl_uniform_block *const block = - &prog->UniformBlocks[ubo_block_index]; + &prog->BufferInterfaceBlocks[ubo_block_index]; assert(var->data.location != -1); @@ -712,6 +716,7 @@ private: { assert(!type->without_array()->is_record()); assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); unsigned id; bool found = this->map->get(id, name); @@ -804,10 +809,11 @@ private: if (type->is_array()) { if (packing == GLSL_INTERFACE_PACKING_STD430) this->uniforms[id].array_stride = - type->fields.array->std430_array_stride(row_major); + type->without_array()->std430_array_stride(row_major); else this->uniforms[id].array_stride = - glsl_align(type->fields.array->std140_size(row_major), 16); + glsl_align(type->without_array()->std140_size(row_major), + 16); } else { this->uniforms[id].array_stride = 0; } @@ -966,15 +972,16 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) if (var->type->is_record()) { sentinel = '.'; - } else if (var->type->without_array()->is_record()) { + } else if (var->type->is_array() && (var->type->fields.array->is_array() + || var->type->without_array()->is_record())) { sentinel = '['; } const unsigned l = strlen(var->name); - for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { - for (unsigned j = 0; j < shader->UniformBlocks[i].NumUniforms; j++) { + for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { + for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) { if (sentinel) { - const char *begin = shader->UniformBlocks[i].Uniforms[j].Name; + const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name; const char *end = strchr(begin, sentinel); if (end == NULL) @@ -989,7 +996,7 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) break; } } else if (!strcmp(var->name, - shader->UniformBlocks[i].Uniforms[j].Name)) { + shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) { found = true; var->data.location = j; break; @@ -1115,10 +1122,10 @@ link_assign_uniform_locations(struct gl_shader_program *prog, sh->num_uniform_components = uniform_size.num_shader_uniform_components; sh->num_combined_uniform_components = sh->num_uniform_components; - for (unsigned i = 0; i < sh->NumUniformBlocks; i++) { - if (!sh->UniformBlocks[i].IsShaderStorage) { + for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { + if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) { sh->num_combined_uniform_components += - sh->UniformBlocks[i].UniformBufferSize / 4; + sh->BufferInterfaceBlocks[i].UniformBufferSize / 4; } } } diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index a97b4ef0a32..25ca928aa43 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -65,6 +65,7 @@ */ #include <ctype.h> +#include "util/strndup.h" #include "main/core.h" #include "glsl_symbol_table.h" #include "glsl_parser_extras.h" @@ -1161,7 +1162,7 @@ cross_validate_uniforms(struct gl_shader_program *prog) } /** - * Accumulates the array of prog->UniformBlocks and checks that all + * Accumulates the array of prog->BufferInterfaceBlocks and checks that all * definitons of blocks agree on their contents. */ static bool @@ -1170,7 +1171,7 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) unsigned max_num_uniform_blocks = 0; for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { if (prog->_LinkedShaders[i]) - max_num_uniform_blocks += prog->_LinkedShaders[i]->NumUniformBlocks; + max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks; } for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { @@ -1184,15 +1185,15 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) if (sh == NULL) continue; - for (unsigned int j = 0; j < sh->NumUniformBlocks; j++) { + for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) { int index = link_cross_validate_uniform_block(prog, - &prog->UniformBlocks, + &prog->BufferInterfaceBlocks, &prog->NumBufferInterfaceBlocks, - &sh->UniformBlocks[j]); + &sh->BufferInterfaceBlocks[j]); if (index == -1) { linker_error(prog, "uniform block `%s' has mismatching definitions\n", - sh->UniformBlocks[j].Name); + sh->BufferInterfaceBlocks[j].Name); return false; } @@ -1386,8 +1387,10 @@ public: virtual ir_visitor_status visit(ir_variable *var) { + const glsl_type *type_without_array; fixup_type(&var->type, var->data.max_array_access, var->data.from_ssbo_unsized_array); + type_without_array = var->type->without_array(); if (var->type->is_interface()) { if (interface_contains_unsized_arrays(var->type)) { const glsl_type *new_type = @@ -1397,11 +1400,10 @@ public: var->type = new_type; var->change_interface_type(new_type); } - } else if (var->type->is_array() && - var->type->fields.array->is_interface()) { - if (interface_contains_unsized_arrays(var->type->fields.array)) { + } else if (type_without_array->is_interface()) { + if (interface_contains_unsized_arrays(type_without_array)) { const glsl_type *new_type = - resize_interface_members(var->type->fields.array, + resize_interface_members(type_without_array, var->get_max_ifc_array_access(), var->is_in_shader_storage_block()); var->change_interface_type(new_type); @@ -2064,9 +2066,9 @@ link_intrastage_shaders(void *mem_ctx, linked->ir = new(linked) exec_list; clone_ir_list(mem_ctx, linked->ir, main->ir); - linked->UniformBlocks = uniform_blocks; - linked->NumUniformBlocks = num_uniform_blocks; - ralloc_steal(linked, linked->UniformBlocks); + linked->BufferInterfaceBlocks = uniform_blocks; + linked->NumBufferInterfaceBlocks = num_uniform_blocks; + ralloc_steal(linked, linked->BufferInterfaceBlocks); link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); @@ -2804,19 +2806,19 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog) for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { /* Don't check SSBOs for Uniform Block Size */ - if (!prog->UniformBlocks[i].IsShaderStorage && - prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { + if (!prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { linker_error(prog, "Uniform block %s too big (%d/%d)\n", - prog->UniformBlocks[i].Name, - prog->UniformBlocks[i].UniformBufferSize, + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, ctx->Const.MaxUniformBlockSize); } - if (prog->UniformBlocks[i].IsShaderStorage && - prog->UniformBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { + if (prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { linker_error(prog, "Shader storage block %s too big (%d/%d)\n", - prog->UniformBlocks[i].Name, - prog->UniformBlocks[i].UniformBufferSize, + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, ctx->Const.MaxShaderStorageBlockSize); } @@ -2824,7 +2826,7 @@ check_resources(struct gl_context *ctx, struct gl_shader_program *prog) if (prog->UniformBlockStageIndex[j][i] != -1) { struct gl_shader *sh = prog->_LinkedShaders[j]; int stage_index = prog->UniformBlockStageIndex[j][i]; - if (sh && sh->UniformBlocks[stage_index].IsShaderStorage) { + if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) { shader_blocks[j]++; total_shader_storage_blocks++; } else { @@ -2941,7 +2943,7 @@ check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { int stage_index = prog->UniformBlockStageIndex[i][j]; - if (stage_index != -1 && sh->UniformBlocks[stage_index].IsShaderStorage) + if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) total_shader_storage_blocks++; } @@ -3147,7 +3149,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg, return true; for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - block_name = shProg->UniformBlocks[i].Name; + block_name = shProg->BufferInterfaceBlocks[i].Name; if (strncmp(block_name, name, strlen(block_name)) == 0) { found_interface = true; break; @@ -3389,6 +3391,242 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage) return true; } +static char* +get_top_level_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + int name_size = 0; + /* From ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the + * number of active array elements of the top-level shader storage block + * member containing to the active variable is written to <params>. If the + * top-level block member is not declared as an array, the value one is + * written to <params>. If the top-level block member is an array with no + * declared size, the value zero is written to <params>. + */ + + /* The buffer variable is on top level.*/ + if (!first_square_bracket && !first_dot) + name_size = strlen(name); + else if ((!first_square_bracket || + (first_dot && first_dot < first_square_bracket))) + name_size = first_dot - name; + else + name_size = first_square_bracket - name; + + return strndup(name, name_size); +} + +static char* +get_var_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + + if (!first_dot) + return strdup(name); + + return strndup(first_dot+1, strlen(first_dot) - 1); +} + +static bool +is_top_level_shader_storage_block_member(const char* name, + const char* interface_name, + const char* field_name) +{ + bool result = false; + + /* If the given variable is already a top-level shader storage + * block member, then return array_size = 1. + * We could have two possibilities: if we have an instanced + * shader storage block or not instanced. + * + * For the first, we check create a name as it was in top level and + * compare it with the real name. If they are the same, then + * the variable is already at top-level. + * + * Full instanced name is: interface name + '.' + var name + + * NULL character + */ + int name_length = strlen(interface_name) + 1 + strlen(field_name) + 1; + char *full_instanced_name = (char *) calloc(name_length, sizeof(char)); + if (!full_instanced_name) { + fprintf(stderr, "%s: Cannot allocate space for name\n", __func__); + return false; + } + + snprintf(full_instanced_name, name_length, "%s.%s", + interface_name, field_name); + + /* Check if its top-level shader storage block member of an + * instanced interface block, or of a unnamed interface block. + */ + if (strcmp(name, full_instanced_name) == 0 || + strcmp(name, field_name) == 0) + result = true; + + free(full_instanced_name); + return result; +} + +static void +calculate_array_size(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) +{ + int block_index = uni->block_index; + int array_size = -1; + char *var_name = get_top_level_name(uni->name); + char *interface_name = + get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); + + if (strcmp(var_name, interface_name) == 0) { + /* Deal with instanced array of SSBOs */ + char *temp_name = get_var_name(uni->name); + free(var_name); + var_name = get_top_level_name(temp_name); + free(temp_name); + } + + for (unsigned i = 0; i < shProg->NumShaders; i++) { + if (shProg->Shaders[i] == NULL) + continue; + + const gl_shader *stage = shProg->Shaders[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + var->data.mode != ir_var_shader_storage) + continue; + + const glsl_type *interface = var->get_interface_type(); + + if (strcmp(interface_name, interface->name) != 0) + continue; + + for (unsigned i = 0; i < interface->length; i++) { + const glsl_struct_field *field = &interface->fields.structure[i]; + if (strcmp(field->name, var_name) != 0) + continue; + /* From GL_ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer + * identifying the number of active array elements of the top-level + * shader storage block member containing to the active variable is + * written to <params>. If the top-level block member is not + * declared as an array, the value one is written to <params>. If + * the top-level block member is an array with no declared size, + * the value zero is written to <params>. + */ + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + array_size = 1; + else if (field->type->is_unsized_array()) + array_size = 0; + else if (field->type->is_array()) + array_size = field->type->length; + else + array_size = 1; + + goto found_top_level_array_size; + } + } + } +found_top_level_array_size: + free(interface_name); + free(var_name); + uni->top_level_array_size = array_size; +} + +static void +calculate_array_stride(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) +{ + int block_index = uni->block_index; + int array_stride = -1; + char *var_name = get_top_level_name(uni->name); + char *interface_name = + get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); + + if (strcmp(var_name, interface_name) == 0) { + /* Deal with instanced array of SSBOs */ + char *temp_name = get_var_name(uni->name); + free(var_name); + var_name = get_top_level_name(temp_name); + free(temp_name); + } + + for (unsigned i = 0; i < shProg->NumShaders; i++) { + if (shProg->Shaders[i] == NULL) + continue; + + const gl_shader *stage = shProg->Shaders[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + var->data.mode != ir_var_shader_storage) + continue; + + const glsl_type *interface = var->get_interface_type(); + + if (strcmp(interface_name, interface->name) != 0) { + continue; + } + + for (unsigned i = 0; i < interface->length; i++) { + const glsl_struct_field *field = &interface->fields.structure[i]; + if (strcmp(field->name, var_name) != 0) + continue; + /* From GL_ARB_program_interface_query: + * + * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer + * identifying the stride between array elements of the top-level + * shader storage block member containing the active variable is + * written to <params>. For top-level block members declared as + * arrays, the value written is the difference, in basic machine + * units, between the offsets of the active variable for + * consecutive elements in the top-level array. For top-level + * block members not declared as an array, zero is written to + * <params>." + */ + if (field->type->is_array()) { + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(field->matrix_layout); + bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + const glsl_type *array_type = field->type->fields.array; + + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) { + array_stride = 0; + goto found_top_level_array_stride; + } + if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { + if (array_type->is_record() || array_type->is_array()) { + array_stride = array_type->std140_size(row_major); + array_stride = glsl_align(array_stride, 16); + } else { + unsigned element_base_align = 0; + element_base_align = array_type->std140_base_alignment(row_major); + array_stride = MAX2(element_base_align, 16); + } + } else { + array_stride = array_type->std430_array_stride(row_major); + } + } else { + array_stride = 0; + } + goto found_top_level_array_stride; + } + } + } +found_top_level_array_stride: + free(interface_name); + free(var_name); + uni->top_level_array_stride = array_stride; +} + /** * Builds up a list of program resources that point to existing * resource data. @@ -3473,6 +3711,11 @@ build_program_resource_list(struct gl_shader_program *shProg) shProg->UniformStorage[i].name)) continue; + if (is_shader_storage) { + calculate_array_size(shProg, &shProg->UniformStorage[i]); + calculate_array_stride(shProg, &shProg->UniformStorage[i]); + } + if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], stageref)) return; @@ -3480,10 +3723,10 @@ build_program_resource_list(struct gl_shader_program *shProg) /* Add program uniform blocks and shader storage blocks. */ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - bool is_shader_storage = shProg->UniformBlocks[i].IsShaderStorage; + bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage; GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK; if (!add_program_resource(shProg, type, - &shProg->UniformBlocks[i], 0)) + &shProg->BufferInterfaceBlocks[i], 0)) return; } @@ -3599,6 +3842,42 @@ link_assign_subroutine_types(struct gl_shader_program *prog) } } +static void +split_ubos_and_ssbos(void *mem_ctx, + struct gl_uniform_block *blocks, + unsigned num_blocks, + struct gl_uniform_block ***ubos, + unsigned *num_ubos, + struct gl_uniform_block ***ssbos, + unsigned *num_ssbos) +{ + unsigned num_ubo_blocks = 0; + unsigned num_ssbo_blocks = 0; + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) + num_ssbo_blocks++; + else + num_ubo_blocks++; + } + + *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks); + *num_ubos = 0; + + *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); + *num_ssbos = 0; + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) { + (*ssbos)[(*num_ssbos)++] = &blocks[i]; + } else { + (*ubos)[(*num_ubos)++] = &blocks[i]; + } + } + + assert(*num_ubos + *num_ssbos == num_blocks); +} + void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { @@ -4110,6 +4389,31 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } } + /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks + * for gl_shader_program and gl_shader, so that drivers that need separate + * index spaces for each set can have that. + */ + for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) { + gl_shader *sh = prog->_LinkedShaders[i]; + split_ubos_and_ssbos(sh, + sh->BufferInterfaceBlocks, + sh->NumBufferInterfaceBlocks, + &sh->UniformBlocks, + &sh->NumUniformBlocks, + &sh->ShaderStorageBlocks, + &sh->NumShaderStorageBlocks); + } + } + + split_ubos_and_ssbos(prog, + prog->BufferInterfaceBlocks, + prog->NumBufferInterfaceBlocks, + &prog->UniformBlocks, + &prog->NumUniformBlocks, + &prog->ShaderStorageBlocks, + &prog->NumShaderStorageBlocks); + /* FINISHME: Assign fragment shader output locations. */ done: diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp index 01bbdd0587e..276a2dedf47 100644 --- a/src/glsl/lower_named_interface_blocks.cpp +++ b/src/glsl/lower_named_interface_blocks.cpp @@ -65,6 +65,39 @@ #include "ir_rvalue_visitor.h" #include "program/hash_table.h" +static const glsl_type * +process_array_type(const glsl_type *type, unsigned idx) +{ + const glsl_type *element_type = type->fields.array; + if (element_type->is_array()) { + const glsl_type *new_array_type = process_array_type(element_type, idx); + return glsl_type::get_array_instance(new_array_type, type->length); + } else { + return glsl_type::get_array_instance( + element_type->fields.structure[idx].type, type->length); + } +} + +static ir_rvalue * +process_array_ir(void * const mem_ctx, + ir_dereference_array *deref_array_prev, + ir_rvalue *deref_var) +{ + ir_dereference_array *deref_array = + deref_array_prev->array->as_dereference_array(); + + if (deref_array == NULL) { + return new(mem_ctx) ir_dereference_array(deref_var, + deref_array_prev->array_index); + } else { + deref_array = (ir_dereference_array *) process_array_ir(mem_ctx, + deref_array, + deref_var); + return new(mem_ctx) ir_dereference_array(deref_array, + deref_array_prev->array_index); + } +} + namespace { class flatten_named_interface_blocks_declarations : public ir_rvalue_visitor @@ -112,15 +145,9 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions) var->data.mode == ir_var_shader_storage) continue; - const glsl_type * iface_t = var->type; - const glsl_type * array_t = NULL; + const glsl_type * iface_t = var->type->without_array(); exec_node *insert_pos = var; - if (iface_t->is_array()) { - array_t = iface_t; - iface_t = array_t->fields.array; - } - assert (iface_t->is_interface()); for (unsigned i = 0; i < iface_t->length; i++) { @@ -137,7 +164,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions) ir_variable *new_var; char *var_name = ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name); - if (array_t == NULL) { + if (!var->type->is_array()) { new_var = new(mem_ctx) ir_variable(iface_t->fields.structure[i].type, var_name, @@ -145,9 +172,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions) new_var->data.from_named_ifc_block_nonarray = 1; } else { const glsl_type *new_array_type = - glsl_type::get_array_instance( - iface_t->fields.structure[i].type, - array_t->length); + process_array_type(var->type, i); new_var = new(mem_ctx) ir_variable(new_array_type, var_name, @@ -236,9 +261,8 @@ flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue) ir_dereference_array *deref_array = ir->record->as_dereference_array(); if (deref_array != NULL) { - *rvalue = - new(mem_ctx) ir_dereference_array(deref_var, - deref_array->array_index); + *rvalue = process_array_ir(mem_ctx, deref_array, + (ir_rvalue *)deref_var); } else { *rvalue = deref_var; } diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index 247620e6148..e818c048461 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -203,55 +203,114 @@ static const char * interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, ir_rvalue **nonconst_block_index) { - ir_rvalue *previous_index = NULL; *nonconst_block_index = NULL; + char *name_copy = NULL; + size_t base_length = 0; + + /* Loop back through the IR until we find the uniform block */ + ir_rvalue *ir = d; + while (ir != NULL) { + switch (ir->ir_type) { + case ir_type_dereference_variable: { + /* Exit loop */ + ir = NULL; + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *r = (ir_dereference_record *) ir; + ir = r->record->as_dereference(); + + /* If we got here it means any previous array subscripts belong to + * block members and not the block itself so skip over them in the + * next pass. + */ + d = ir; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) ir; + ir = a->array->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *s = (ir_swizzle *) ir; + ir = s->val->as_dereference(); + break; + } + + default: + assert(!"Should not get here."); + break; + } + } while (d != NULL) { switch (d->ir_type) { case ir_type_dereference_variable: { ir_dereference_variable *v = (ir_dereference_variable *) d; - if (previous_index - && v->var->is_interface_instance() - && v->var->type->is_array()) { - - ir_constant *const_index = previous_index->as_constant(); - if (!const_index) { - *nonconst_block_index = previous_index; - return ralloc_asprintf(mem_ctx, "%s[0]", base_name); - } else { - return ralloc_asprintf(mem_ctx, - "%s[%d]", - base_name, - const_index->get_uint_component(0)); - } + if (name_copy != NULL && + v->var->is_interface_instance() && + v->var->type->is_array()) { + return name_copy; } else { + *nonconst_block_index = NULL; return base_name; } break; } - case ir_type_dereference_record: { - ir_dereference_record *r = (ir_dereference_record *) d; - - d = r->record->as_dereference(); - break; - } - case ir_type_dereference_array: { ir_dereference_array *a = (ir_dereference_array *) d; + size_t new_length; + + if (name_copy == NULL) { + name_copy = ralloc_strdup(mem_ctx, base_name); + base_length = strlen(name_copy); + } + + /* For arrays of arrays we start at the innermost array and work our + * way out so we need to insert the subscript at the base of the + * name string rather than just attaching it to the end. + */ + new_length = base_length; + ir_constant *const_index = a->array_index->as_constant(); + char *end = ralloc_strdup(NULL, &name_copy[new_length]); + if (!const_index) { + ir_rvalue *array_index = a->array_index; + if (array_index->type != glsl_type::uint_type) + array_index = i2u(array_index); + + if (a->array->type->is_array() && + a->array->type->fields.array->is_array()) { + ir_constant *base_size = new(mem_ctx) + ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); + array_index = mul(array_index, base_size); + } + + if (*nonconst_block_index) { + *nonconst_block_index = add(*nonconst_block_index, array_index); + } else { + *nonconst_block_index = array_index; + } + + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", + end); + } else { + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", + const_index->get_uint_component(0), + end); + } + ralloc_free(end); d = a->array->as_dereference(); - previous_index = a->array_index; break; } - case ir_type_swizzle: { - ir_swizzle *s = (ir_swizzle *) d; - d = s->val->as_dereference(); - break; - } default: assert(!"Should not get here."); break; @@ -277,27 +336,31 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, deref, &nonconst_block_index); - /* Locate the ubo block by interface name */ + /* Locate the block by interface name */ + this->is_shader_storage = var->is_in_shader_storage_block(); + unsigned num_blocks; + struct gl_uniform_block **blocks; + if (this->is_shader_storage) { + num_blocks = shader->NumShaderStorageBlocks; + blocks = shader->ShaderStorageBlocks; + } else { + num_blocks = shader->NumUniformBlocks; + blocks = shader->UniformBlocks; + } this->uniform_block = NULL; - for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { - if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) { + for (unsigned i = 0; i < num_blocks; i++) { + if (strcmp(field_name, blocks[i]->Name) == 0) { ir_constant *index = new(mem_ctx) ir_constant(i); if (nonconst_block_index) { - if (nonconst_block_index->type != glsl_type::uint_type) - nonconst_block_index = i2u(nonconst_block_index); this->uniform_block = add(nonconst_block_index, index); } else { this->uniform_block = index; } - this->is_shader_storage = shader->UniformBlocks[i].IsShaderStorage; - - struct gl_uniform_block *block = &shader->UniformBlocks[i]; - this->ubo_var = var->is_interface_instance() - ? &block->Uniforms[0] : &block->Uniforms[var->data.location]; + ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; break; } @@ -335,7 +398,7 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, if (deref_array->array->type->is_double()) array_stride *= 2; *matrix_columns = deref_array->array->type->matrix_columns; - } else if (deref_array->type->is_interface()) { + } else if (deref_array->type->without_array()->is_interface()) { /* We're processing an array dereference of an interface instance * array. The thing being dereferenced *must* be a variable * dereference because interfaces cannot be embedded in other @@ -344,7 +407,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, * interface instance array will have the same offsets relative to * the base of the block that backs them. */ - assert(deref_array->array->as_dereference_variable()); deref = deref_array->array->as_dereference(); break; } else { @@ -744,7 +806,31 @@ lower_ubo_reference_visitor::emit_access(bool is_write, * or 32 depending on the number of columns. */ assert(matrix_columns <= 4); - unsigned matrix_stride = glsl_align(matrix_columns * N, 16); + unsigned matrix_stride = 0; + /* Matrix stride for std430 mat2xY matrices are not rounded up to + * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform + * Block Layout": + * + * "2. If the member is a two- or four-component vector with components + * consuming N basic machine units, the base alignment is 2N or 4N, + * respectively." [...] + * "4. If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single array + * element, according to rules (1), (2), and (3), and rounded up to the + * base alignment of a vec4." [...] + * "7. If the member is a row-major matrix with C columns and R rows, the + * matrix is stored identically to an array of R row vectors with C + * components each, according to rule (4)." [...] + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures in + * rule 9 are not rounded up a multiple of the base alignment of a vec4." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) + matrix_stride = 2 * N; + else + matrix_stride = glsl_align(matrix_columns * N, 16); const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? glsl_type::float_type : glsl_type::double_type; diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp index 0c3394a504b..b6238825f8a 100644 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/glsl/lower_vec_index_to_cond_assign.cpp @@ -88,7 +88,9 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ exec_list list; /* Store the index to a temporary to avoid reusing its tree. */ - index = new(base_ir) ir_variable(glsl_type::int_type, + assert(orig_index->type == glsl_type::int_type || + orig_index->type == glsl_type::uint_type); + index = new(base_ir) ir_variable(orig_index->type, "vec_index_tmp_i", ir_var_temporary); list.push_tail(index); diff --git a/src/glsl/lower_vector_insert.cpp b/src/glsl/lower_vector_insert.cpp index 6d7cfa94262..26d31b03c12 100644 --- a/src/glsl/lower_vector_insert.cpp +++ b/src/glsl/lower_vector_insert.cpp @@ -108,9 +108,13 @@ vector_insert_visitor::handle_rvalue(ir_rvalue **rv) factory.emit(assign(temp, expr->operands[0])); factory.emit(assign(src_temp, expr->operands[1])); + assert(expr->operands[2]->type == glsl_type::int_type || + expr->operands[2]->type == glsl_type::uint_type); + for (unsigned i = 0; i < expr->type->vector_elements; i++) { ir_constant *const cmp_index = - new(factory.mem_ctx) ir_constant(int(i)); + ir_constant::zero(factory.mem_ctx, expr->operands[2]->type); + cmp_index->value.u[0] = i; ir_variable *const cmp_result = factory.make_temp(glsl_type::bool_type, "index_condition"); diff --git a/src/glsl/builtin_type_macros.h b/src/glsl/nir/builtin_type_macros.h index 8e16ae45489..8e16ae45489 100644 --- a/src/glsl/builtin_type_macros.h +++ b/src/glsl/nir/builtin_type_macros.h diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 6bedb4eb8e6..e57e834d948 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -152,11 +152,13 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, if (sh->Program->SamplersUsed & (1 << i)) num_textures = i; - shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name); + shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); + if (shader_prog->Label) + shader->info.label = ralloc_strdup(shader, shader_prog->Label); shader->info.num_textures = num_textures; shader->info.num_ubos = sh->NumUniformBlocks; shader->info.num_abos = shader_prog->NumAtomicBuffers; - shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks; + shader->info.num_ssbos = sh->NumShaderStorageBlocks; shader->info.num_images = sh->NumImages; shader->info.inputs_read = sh->Program->InputsRead; shader->info.outputs_written = sh->Program->OutputsWritten; @@ -164,11 +166,37 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader->info.uses_texture_gather = sh->Program->UsesGather; shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut; shader->info.separate_shader = shader_prog->SeparateShader; - shader->info.gs.vertices_out = sh->Geom.VerticesOut; - shader->info.gs.invocations = sh->Geom.Invocations; shader->info.has_transform_feedback_varyings = shader_prog->TransformFeedback.NumVarying > 0; + switch (stage) { + case MESA_SHADER_GEOMETRY: + shader->info.gs.vertices_out = sh->Geom.VerticesOut; + shader->info.gs.invocations = sh->Geom.Invocations; + break; + + case MESA_SHADER_FRAGMENT: { + struct gl_fragment_program *fp = + (struct gl_fragment_program *)sh->Program; + + shader->info.fs.uses_discard = fp->UsesKill; + shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests; + shader->info.fs.depth_layout = fp->FragDepthLayout; + break; + } + + case MESA_SHADER_COMPUTE: { + struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program; + shader->info.cs.local_size[0] = cp->LocalSize[0]; + shader->info.cs.local_size[1] = cp->LocalSize[1]; + shader->info.cs.local_size[2] = cp->LocalSize[2]; + break; + } + + default: + break; /* No stage-specific info */ + } + return shader; } @@ -393,35 +421,10 @@ nir_visitor::visit(ir_variable *ir) var->interface_type = ir->get_interface_type(); - switch (var->data.mode) { - case nir_var_local: - exec_list_push_tail(&impl->locals, &var->node); - break; - - case nir_var_global: - exec_list_push_tail(&shader->globals, &var->node); - break; - - case nir_var_shader_in: - exec_list_push_tail(&shader->inputs, &var->node); - break; - - case nir_var_shader_out: - exec_list_push_tail(&shader->outputs, &var->node); - break; - - case nir_var_uniform: - case nir_var_shader_storage: - exec_list_push_tail(&shader->uniforms, &var->node); - break; - - case nir_var_system_value: - exec_list_push_tail(&shader->system_values, &var->node); - break; - - default: - unreachable("not reached"); - } + if (var->data.mode == nir_var_local) + nir_function_impl_add_variable(impl, var); + else + nir_shader_add_variable(shader, var); _mesa_hash_table_insert(var_table, ir, var); this->var = var; @@ -695,9 +698,21 @@ nir_visitor::visit(ir_call *ir) } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) { op = nir_intrinsic_ssbo_atomic_xor; } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) { - op = nir_intrinsic_ssbo_atomic_min; + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umin; + else + unreachable("Invalid type"); } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) { - op = nir_intrinsic_ssbo_atomic_max; + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umax; + else + unreachable("Invalid type"); } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) { op = nir_intrinsic_ssbo_atomic_exchange; } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) { @@ -906,8 +921,10 @@ nir_visitor::visit(ir_call *ir) break; } case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_min: - case nir_intrinsic_ssbo_atomic_max: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: case nir_intrinsic_ssbo_atomic_and: case nir_intrinsic_ssbo_atomic_or: case nir_intrinsic_ssbo_atomic_xor: @@ -2065,13 +2082,10 @@ nir_visitor::visit(ir_constant *ir) * constant initializer and return a dereference. */ - nir_variable *var = ralloc(this->shader, nir_variable); - var->name = ralloc_strdup(var, "const_temp"); - var->type = ir->type; - var->data.mode = nir_var_local; + nir_variable *var = + nir_local_variable_create(this->impl, ir->type, "const_temp"); var->data.read_only = true; var->constant_initializer = constant_copy(ir, var); - exec_list_push_tail(&this->impl->locals, &var->node); this->deref_head = nir_deref_var_create(this->shader, var); this->deref_tail = &this->deref_head->deref; diff --git a/src/glsl/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index 9ef2fbf2525..309f9dca61e 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -1175,7 +1175,22 @@ glsl_type::record_location_offset(unsigned length) const const glsl_type *wa = st->without_array(); if (wa->is_record()) { unsigned r_offset = wa->record_location_offset(wa->length); - offset += st->is_array() ? st->length * r_offset : r_offset; + offset += st->is_array() ? + st->arrays_of_arrays_size() * r_offset : r_offset; + } else if (st->is_array() && st->fields.array->is_array()) { + unsigned outer_array_size = st->length; + const glsl_type *base_type = st->fields.array; + + /* For arrays of arrays the outer arrays take up a uniform + * slot for each element. The innermost array elements share a + * single slot so we ignore the innermost array when calculating + * the offset. + */ + while (base_type->fields.array->is_array()) { + outer_array_size = outer_array_size * base_type->length; + base_type = base_type->fields.array; + } + offset += outer_array_size; } else { /* We dont worry about arrays here because unless the array * contains a structure or another array it only takes up a single @@ -1419,8 +1434,8 @@ glsl_type::std140_size(bool row_major) const unsigned int array_len; if (this->is_array()) { - element_type = this->fields.array; - array_len = this->length; + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); } else { element_type = this; array_len = 1; @@ -1453,12 +1468,13 @@ glsl_type::std140_size(bool row_major) const * the array are laid out in order, according to rule (9). */ if (this->is_array()) { - if (this->fields.array->is_record()) { - return this->length * this->fields.array->std140_size(row_major); + if (this->without_array()->is_record()) { + return this->arrays_of_arrays_size() * + this->without_array()->std140_size(row_major); } else { - unsigned element_base_align = - this->fields.array->std140_base_alignment(row_major); - return this->length * MAX2(element_base_align, 16); + unsigned element_base_align = + this->without_array()->std140_base_alignment(row_major); + return this->arrays_of_arrays_size() * MAX2(element_base_align, 16); } } @@ -1818,3 +1834,17 @@ glsl_type::coordinate_components() const return size; } + +/** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ +#define DECL_TYPE(NAME, ...) \ + const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ + const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; + +#define STRUCT_TYPE(NAME) + +#include "builtin_type_macros.h" +/** @} */ diff --git a/src/glsl/glsl_types.h b/src/glsl/nir/glsl_types.h index b83e1ca3d2c..b83e1ca3d2c 100644 --- a/src/glsl/glsl_types.h +++ b/src/glsl/nir/glsl_types.h diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index e12da805281..793bdafb54b 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -103,6 +103,72 @@ nir_reg_remove(nir_register *reg) exec_node_remove(®->node); } +void +nir_shader_add_variable(nir_shader *shader, nir_variable *var) +{ + switch (var->data.mode) { + case nir_var_local: + assert(!"nir_shader_add_variable cannot be used for local variables"); + break; + + case nir_var_global: + exec_list_push_tail(&shader->globals, &var->node); + break; + + case nir_var_shader_in: + exec_list_push_tail(&shader->inputs, &var->node); + break; + + case nir_var_shader_out: + exec_list_push_tail(&shader->outputs, &var->node); + break; + + case nir_var_uniform: + case nir_var_shader_storage: + exec_list_push_tail(&shader->uniforms, &var->node); + break; + + case nir_var_system_value: + exec_list_push_tail(&shader->system_values, &var->node); + break; + } +} + +nir_variable * +nir_variable_create(nir_shader *shader, nir_variable_mode mode, + const struct glsl_type *type, const char *name) +{ + nir_variable *var = rzalloc(shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = mode; + + if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + + if (mode == nir_var_shader_in || mode == nir_var_uniform) + var->data.read_only = true; + + nir_shader_add_variable(shader, var); + + return var; +} + +nir_variable * +nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, const char *name) +{ + nir_variable *var = rzalloc(impl->overload->function->shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = nir_var_local; + + nir_function_impl_add_variable(impl, var); + + return var; +} + nir_function * nir_function_create(nir_shader *shader, const char *name) { @@ -1080,31 +1146,33 @@ nir_src_as_const_value(nir_src src) return &load->value; } +/** + * Returns true if the source is known to be dynamically uniform. Otherwise it + * returns false which means it may or may not be dynamically uniform but it + * can't be determined. + */ bool -nir_srcs_equal(nir_src src1, nir_src src2) +nir_src_is_dynamically_uniform(nir_src src) { - if (src1.is_ssa) { - if (src2.is_ssa) { - return src1.ssa == src2.ssa; - } else { - return false; - } - } else { - if (src2.is_ssa) { - return false; - } else { - if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) - return false; + if (!src.is_ssa) + return false; - if (src1.reg.indirect) { - if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) - return false; - } + /* Constants are trivially dynamically uniform */ + if (src.ssa->parent_instr->type == nir_instr_type_load_const) + return true; - return src1.reg.reg == src2.reg.reg && - src1.reg.base_offset == src2.reg.base_offset; - } + /* As are uniform variables */ + if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); + + if (intr->intrinsic == nir_intrinsic_load_uniform) + return true; } + + /* XXX: this could have many more tests, such as when a sampler function is + * called with dynamically uniform arguments. + */ + return false; } static void diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index f7b9483d74a..825c34805c4 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -35,7 +35,7 @@ #include "util/set.h" #include "util/bitset.h" #include "nir_types.h" -#include "glsl/shader_enums.h" +#include "shader_enums.h" #include <stdio.h> #include "nir_opcodes.h" @@ -738,7 +738,7 @@ nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) * used for a source */ static inline unsigned -nir_ssa_alu_instr_src_components(nir_alu_instr *instr, unsigned src) +nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) { assert(instr->dest.dest.is_ssa); @@ -1486,6 +1486,9 @@ typedef struct nir_shader_compiler_options { typedef struct nir_shader_info { const char *name; + /* Descriptive name provided by the client; may be NULL */ + const char *label; + /* Number of textures used by this shader */ unsigned num_textures; /* Number of uniform buffers used by this shader */ @@ -1516,13 +1519,32 @@ typedef struct nir_shader_info { /** Was this shader linked with any transform feedback varyings? */ bool has_transform_feedback_varyings; - struct { - /** The maximum number of vertices the geometry shader might write. */ - unsigned vertices_out; + union { + struct { + /** The maximum number of vertices the geometry shader might write. */ + unsigned vertices_out; + + /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ + unsigned invocations; + } gs; + + struct { + bool uses_discard; + + /** + * Whether early fragment tests are enabled as defined by + * ARB_shader_image_load_store. + */ + bool early_fragment_tests; + + /** gl_FragDepth layout for ARB_conservative_depth. */ + enum gl_frag_depth_layout depth_layout; + } fs; - /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ - unsigned invocations; - } gs; + struct { + unsigned local_size[3]; + } cs; + }; } nir_shader_info; typedef struct nir_shader { @@ -1585,6 +1607,26 @@ nir_register *nir_local_reg_create(nir_function_impl *impl); void nir_reg_remove(nir_register *reg); +/** Adds a variable to the appropreate list in nir_shader */ +void nir_shader_add_variable(nir_shader *shader, nir_variable *var); + +static inline void +nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) +{ + assert(var->data.mode == nir_var_local); + exec_list_push_tail(&impl->locals, &var->node); +} + +/** creates a variable, sets a few defaults, and adds it to the list */ +nir_variable *nir_variable_create(nir_shader *shader, + nir_variable_mode mode, + const struct glsl_type *type, + const char *name); +/** creates a local variable and adds it to the list */ +nir_variable *nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, + const char *name); + /** creates a function and adds it to the shader's list of functions */ nir_function *nir_function_create(nir_shader *shader, const char *name); @@ -1821,6 +1863,7 @@ bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); nir_const_value *nir_src_as_const_value(nir_src src); +bool nir_src_is_dynamically_uniform(nir_src src); bool nir_srcs_equal(nir_src src1, nir_src src2); void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py index 8fd9b1039a7..2ba8554645d 100644 --- a/src/glsl/nir/nir_constant_expressions.py +++ b/src/glsl/nir/nir_constant_expressions.py @@ -29,6 +29,7 @@ template = """\ #include <math.h> #include "main/core.h" #include "util/rounding.h" /* for _mesa_roundeven */ +#include "util/half_float.h" #include "nir_constant_expressions.h" #if defined(__SUNPRO_CC) diff --git a/src/glsl/nir/nir_instr_set.c b/src/glsl/nir/nir_instr_set.c new file mode 100644 index 00000000000..d3f939fe805 --- /dev/null +++ b/src/glsl/nir/nir_instr_set.c @@ -0,0 +1,519 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_instr_set.h" +#include "nir_vla.h" + +#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) + +static uint32_t +hash_src(uint32_t hash, const nir_src *src) +{ + assert(src->is_ssa); + hash = HASH(hash, src->ssa); + return hash; +} + +static uint32_t +hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) +{ + hash = HASH(hash, src->abs); + hash = HASH(hash, src->negate); + + for (unsigned i = 0; i < num_components; i++) + hash = HASH(hash, src->swizzle[i]); + + hash = hash_src(hash, &src->src); + return hash; +} + +static uint32_t +hash_alu(uint32_t hash, const nir_alu_instr *instr) +{ + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->dest.dest.ssa.num_components); + + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); + uint32_t hash0 = hash_alu_src(hash, &instr->src[0], + nir_ssa_alu_instr_src_components(instr, 0)); + uint32_t hash1 = hash_alu_src(hash, &instr->src[1], + nir_ssa_alu_instr_src_components(instr, 1)); + /* For commutative operations, we need some commutative way of + * combining the hashes. One option would be to XOR them but that + * means that anything with two identical sources will hash to 0 and + * that's common enough we probably don't want the guaranteed + * collision. Either addition or multiplication will also work. + */ + hash = hash0 * hash1; + } else { + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + hash = hash_alu_src(hash, &instr->src[i], + nir_ssa_alu_instr_src_components(instr, i)); + } + } + + return hash; +} + +static uint32_t +hash_load_const(uint32_t hash, const nir_load_const_instr *instr) +{ + hash = HASH(hash, instr->def.num_components); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f, + instr->def.num_components + * sizeof(instr->value.f[0])); + + return hash; +} + +static int +cmp_phi_src(const void *data1, const void *data2) +{ + nir_phi_src *src1 = *(nir_phi_src **)data1; + nir_phi_src *src2 = *(nir_phi_src **)data2; + return src1->pred - src2->pred; +} + +static uint32_t +hash_phi(uint32_t hash, const nir_phi_instr *instr) +{ + hash = HASH(hash, instr->instr.block); + + /* sort sources by predecessor, since the order shouldn't matter */ + unsigned num_preds = instr->instr.block->predecessors->entries; + NIR_VLA(nir_phi_src *, srcs, num_preds); + unsigned i = 0; + nir_foreach_phi_src(instr, src) { + srcs[i++] = src; + } + + qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); + + for (i = 0; i < num_preds; i++) { + hash = hash_src(hash, &srcs[i]->src); + hash = HASH(hash, srcs[i]->pred); + } + + return hash; +} + +static uint32_t +hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + hash = HASH(hash, instr->intrinsic); + + if (info->has_dest) + hash = HASH(hash, instr->dest.ssa.num_components); + + assert(info->num_variables == 0); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, + info->num_indices + * sizeof(instr->const_index[0])); + return hash; +} + +static uint32_t +hash_tex(uint32_t hash, const nir_tex_instr *instr) +{ + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->num_srcs); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + hash = HASH(hash, instr->src[i].src_type); + hash = hash_src(hash, &instr->src[i].src); + } + + hash = HASH(hash, instr->coord_components); + hash = HASH(hash, instr->sampler_dim); + hash = HASH(hash, instr->is_array); + hash = HASH(hash, instr->is_shadow); + hash = HASH(hash, instr->is_new_style_shadow); + hash = HASH(hash, instr->const_offset); + unsigned component = instr->component; + hash = HASH(hash, component); + hash = HASH(hash, instr->sampler_index); + hash = HASH(hash, instr->sampler_array_size); + + assert(!instr->sampler); + + return hash; +} + +/* Computes a hash of an instruction for use in a hash table. Note that this + * will only work for instructions where instr_can_rewrite() returns true, and + * it should return identical hashes for two instructions that are the same + * according nir_instrs_equal(). + */ + +static uint32_t +hash_instr(const void *data) +{ + const nir_instr *instr = data; + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + switch (instr->type) { + case nir_instr_type_alu: + hash = hash_alu(hash, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + hash = hash_load_const(hash, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_phi: + hash = hash_phi(hash, nir_instr_as_phi(instr)); + break; + case nir_instr_type_intrinsic: + hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + hash = hash_tex(hash, nir_instr_as_tex(instr)); + break; + default: + unreachable("Invalid instruction type"); + } + + return hash; +} + +bool +nir_srcs_equal(nir_src src1, nir_src src2) +{ + if (src1.is_ssa) { + if (src2.is_ssa) { + return src1.ssa == src2.ssa; + } else { + return false; + } + } else { + if (src2.is_ssa) { + return false; + } else { + if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) + return false; + + if (src1.reg.indirect) { + if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) + return false; + } + + return src1.reg.reg == src2.reg.reg && + src1.reg.base_offset == src2.reg.base_offset; + } + } +} + +static bool +nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, + unsigned src1, unsigned src2) +{ + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) + return false; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) + return false; + } + + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); +} + +/* Returns "true" if two instructions are equal. Note that this will only + * work for the subset of instructions defined by instr_can_rewrite(). Also, + * it should only return "true" for instructions that hash_instr() will return + * the same hash for (ignoring collisions, of course). + */ + +static bool +nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) +{ + if (instr1->type != instr2->type) + return false; + + switch (instr1->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu1 = nir_instr_as_alu(instr1); + nir_alu_instr *alu2 = nir_instr_as_alu(instr2); + + if (alu1->op != alu2->op) + return false; + + /* TODO: We can probably acutally do something more inteligent such + * as allowing different numbers and taking a maximum or something + * here */ + if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) + return false; + + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } + } + return true; + } + case nir_instr_type_tex: { + nir_tex_instr *tex1 = nir_instr_as_tex(instr1); + nir_tex_instr *tex2 = nir_instr_as_tex(instr2); + + if (tex1->op != tex2->op) + return false; + + if (tex1->num_srcs != tex2->num_srcs) + return false; + for (unsigned i = 0; i < tex1->num_srcs; i++) { + if (tex1->src[i].src_type != tex2->src[i].src_type || + !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { + return false; + } + } + + if (tex1->coord_components != tex2->coord_components || + tex1->sampler_dim != tex2->sampler_dim || + tex1->is_array != tex2->is_array || + tex1->is_shadow != tex2->is_shadow || + tex1->is_new_style_shadow != tex2->is_new_style_shadow || + memcmp(tex1->const_offset, tex2->const_offset, + sizeof(tex1->const_offset)) != 0 || + tex1->component != tex2->component || + tex1->sampler_index != tex2->sampler_index || + tex1->sampler_array_size != tex2->sampler_array_size) { + return false; + } + + /* Don't support un-lowered sampler derefs currently. */ + assert(!tex1->sampler && !tex2->sampler); + + return true; + } + case nir_instr_type_load_const: { + nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); + nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); + + if (load1->def.num_components != load2->def.num_components) + return false; + + return memcmp(load1->value.f, load2->value.f, + load1->def.num_components * sizeof(*load2->value.f)) == 0; + } + case nir_instr_type_phi: { + nir_phi_instr *phi1 = nir_instr_as_phi(instr1); + nir_phi_instr *phi2 = nir_instr_as_phi(instr2); + + if (phi1->instr.block != phi2->instr.block) + return false; + + nir_foreach_phi_src(phi1, src1) { + nir_foreach_phi_src(phi2, src2) { + if (src1->pred == src2->pred) { + if (!nir_srcs_equal(src1->src, src2->src)) + return false; + + break; + } + } + } + + return true; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); + nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); + const nir_intrinsic_info *info = + &nir_intrinsic_infos[intrinsic1->intrinsic]; + + if (intrinsic1->intrinsic != intrinsic2->intrinsic || + intrinsic1->num_components != intrinsic2->num_components) + return false; + + if (info->has_dest && intrinsic1->dest.ssa.num_components != + intrinsic2->dest.ssa.num_components) + return false; + + for (unsigned i = 0; i < info->num_srcs; i++) { + if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) + return false; + } + + assert(info->num_variables == 0); + + for (unsigned i = 0; i < info->num_indices; i++) { + if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) + return false; + } + + return true; + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static bool +src_is_ssa(nir_src *src, void *data) +{ + (void) data; + return src->is_ssa; +} + +static bool +dest_is_ssa(nir_dest *dest, void *data) +{ + (void) data; + return dest->is_ssa; +} + +/* This function determines if uses of an instruction can safely be rewritten + * to use another identical instruction instead. Note that this function must + * be kept in sync with hash_instr() and nir_instrs_equal() -- only + * instructions that pass this test will be handed on to those functions, and + * conversely they must handle everything that this function returns true for. + */ + +static bool +instr_can_rewrite(nir_instr *instr) +{ + /* We only handle SSA. */ + if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || + !nir_foreach_src(instr, src_is_ssa, NULL)) + return false; + + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_load_const: + case nir_instr_type_phi: + return true; + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + /* Don't support un-lowered sampler derefs currently. */ + if (tex->sampler) + return false; + + return true; + } + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER) && + info->num_variables == 0; /* not implemented yet */ + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return false; + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static nir_ssa_def * +nir_instr_get_dest_ssa_def(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); + return &nir_instr_as_alu(instr)->dest.dest.ssa; + case nir_instr_type_load_const: + return &nir_instr_as_load_const(instr)->def; + case nir_instr_type_phi: + assert(nir_instr_as_phi(instr)->dest.is_ssa); + return &nir_instr_as_phi(instr)->dest.ssa; + case nir_instr_type_intrinsic: + assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); + return &nir_instr_as_intrinsic(instr)->dest.ssa; + case nir_instr_type_tex: + assert(nir_instr_as_tex(instr)->dest.is_ssa); + return &nir_instr_as_tex(instr)->dest.ssa; + default: + unreachable("We never ask for any of these"); + } +} + +static bool +cmp_func(const void *data1, const void *data2) +{ + return nir_instrs_equal(data1, data2); +} + +struct set * +nir_instr_set_create(void *mem_ctx) +{ + return _mesa_set_create(mem_ctx, hash_instr, cmp_func); +} + +void +nir_instr_set_destroy(struct set *instr_set) +{ + _mesa_set_destroy(instr_set, NULL); +} + +bool +nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) +{ + if (!instr_can_rewrite(instr)) + return false; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) { + nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); + nir_ssa_def *new_def = + nir_instr_get_dest_ssa_def((nir_instr *) entry->key); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); + return true; + } + + _mesa_set_add(instr_set, instr); + return false; +} + +void +nir_instr_set_remove(struct set *instr_set, nir_instr *instr) +{ + if (!instr_can_rewrite(instr)) + return; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) + _mesa_set_remove(instr_set, entry); +} + diff --git a/src/glsl/nir/nir_instr_set.h b/src/glsl/nir/nir_instr_set.h new file mode 100644 index 00000000000..939e8ddbf58 --- /dev/null +++ b/src/glsl/nir/nir_instr_set.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir.h" + +/** + * This file defines functions for creating, destroying, and manipulating an + * "instruction set," which is an abstraction for finding duplicate + * instructions using a hash set. Note that the question of whether an + * instruction is actually a duplicate (e.g. whether it has any side effects) + * is handled transparently. The user can pass any instruction to + * nir_instr_set_add_or_rewrite() and nir_instr_set_remove(), and if the + * instruction isn't safe to rewrite or isn't supported, it's silently + * removed. + */ + +/*@{*/ + +/** Creates an instruction set, using a given ralloc mem_ctx */ +struct set *nir_instr_set_create(void *mem_ctx); + +/** Destroys an instruction set. */ +void nir_instr_set_destroy(struct set *instr_set); + +/** + * Adds an instruction to an instruction set if it doesn't exist, or if it + * does already exist, rewrites all uses of it to point to the other + * already-inserted instruction. Returns 'true' if the uses of the instruction + * were rewritten. + */ +bool nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr); + +/** + * Removes an instruction from an instruction set, so that other instructions + * won't be merged with it. + */ +void nir_instr_set_remove(struct set *instr_set, nir_instr *instr); + +/*@}*/ + diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index b5a0d715aa3..68a18b9c11a 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -174,8 +174,10 @@ INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, * 3: For CompSwap only: the second data parameter. */ INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_min, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_max, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index 6f9ecc019ec..46e137652a1 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -72,20 +72,22 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) nir_ssa_def *offset_def = &offset_const->def; - if (instr->variables[0]->deref.child != NULL) { - assert(instr->variables[0]->deref.child->deref_type == - nir_deref_type_array); - nir_deref_array *deref_array = - nir_deref_as_array(instr->variables[0]->deref.child); - assert(deref_array->deref.child == NULL); + nir_deref *tail = &instr->variables[0]->deref; + while (tail->child != NULL) { + assert(tail->child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + tail = tail->child; - offset_const->value.u[0] += - deref_array->base_offset * ATOMIC_COUNTER_SIZE; + unsigned child_array_elements = tail->child != NULL ? + glsl_get_aoa_size(tail->type) : 1; + + offset_const->value.u[0] += deref_array->base_offset * + child_array_elements * ATOMIC_COUNTER_SIZE; if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_load_const_instr *atomic_counter_size = nir_load_const_instr_create(mem_ctx, 1); - atomic_counter_size->value.u[0] = ATOMIC_COUNTER_SIZE; + atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE; nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); @@ -102,7 +104,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) add->src[0].src.is_ssa = true; add->src[0].src.ssa = &mul->dest.dest.ssa; add->src[1].src.is_ssa = true; - add->src[1].src.ssa = &offset_const->def; + add->src[1].src.ssa = offset_def; nir_instr_insert_before(&instr->instr, &add->instr); offset_def = &add->dest.dest.ssa; diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c index 64c94afd480..93a6635337a 100644 --- a/src/glsl/nir/nir_opt_cse.c +++ b/src/glsl/nir/nir_opt_cse.c @@ -22,306 +22,60 @@ * * Authors: * Jason Ekstrand ([email protected]) + * Connor Abbott ([email protected]) * */ -#include "nir.h" +#include "nir_instr_set.h" /* * Implements common subexpression elimination */ -struct cse_state { - void *mem_ctx; - bool progress; -}; - -static bool -nir_alu_srcs_equal(nir_alu_instr *alu1, nir_alu_instr *alu2, unsigned src1, - unsigned src2) -{ - if (alu1->src[src1].abs != alu2->src[src2].abs || - alu1->src[src1].negate != alu2->src[src2].negate) - return false; - - for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { - if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) - return false; - } - - return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); -} - -static bool -nir_instrs_equal(nir_instr *instr1, nir_instr *instr2) -{ - if (instr1->type != instr2->type) - return false; - - switch (instr1->type) { - case nir_instr_type_alu: { - nir_alu_instr *alu1 = nir_instr_as_alu(instr1); - nir_alu_instr *alu2 = nir_instr_as_alu(instr2); - - if (alu1->op != alu2->op) - return false; - - /* TODO: We can probably acutally do something more inteligent such - * as allowing different numbers and taking a maximum or something - * here */ - if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) - return false; - - if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[alu1->op].num_inputs == 2); - return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && - nir_alu_srcs_equal(alu1, alu2, 1, 1)) || - (nir_alu_srcs_equal(alu1, alu2, 0, 1) && - nir_alu_srcs_equal(alu1, alu2, 1, 0)); - } else { - for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1, alu2, i, i)) - return false; - } - } - return true; - } - case nir_instr_type_tex: { - nir_tex_instr *tex1 = nir_instr_as_tex(instr1); - nir_tex_instr *tex2 = nir_instr_as_tex(instr2); - - if (tex1->op != tex2->op) - return false; - - if (tex1->num_srcs != tex2->num_srcs) - return false; - for (unsigned i = 0; i < tex1->num_srcs; i++) { - if (tex1->src[i].src_type != tex2->src[i].src_type || - !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { - return false; - } - } - - if (tex1->coord_components != tex2->coord_components || - tex1->sampler_dim != tex2->sampler_dim || - tex1->is_array != tex2->is_array || - tex1->is_shadow != tex2->is_shadow || - tex1->is_new_style_shadow != tex2->is_new_style_shadow || - memcmp(tex1->const_offset, tex2->const_offset, - sizeof(tex1->const_offset)) != 0 || - tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { - return false; - } - - /* Don't support un-lowered sampler derefs currently. */ - if (tex1->sampler || tex2->sampler) - return false; - - return true; - } - case nir_instr_type_load_const: { - nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); - nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); - - if (load1->def.num_components != load2->def.num_components) - return false; - - return memcmp(load1->value.f, load2->value.f, - load1->def.num_components * sizeof(*load2->value.f)) == 0; - } - case nir_instr_type_phi: { - nir_phi_instr *phi1 = nir_instr_as_phi(instr1); - nir_phi_instr *phi2 = nir_instr_as_phi(instr2); - - if (phi1->instr.block != phi2->instr.block) - return false; - - nir_foreach_phi_src(phi1, src1) { - nir_foreach_phi_src(phi2, src2) { - if (src1->pred == src2->pred) { - if (!nir_srcs_equal(src1->src, src2->src)) - return false; - - break; - } - } - } - - return true; - } - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); - nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); - const nir_intrinsic_info *info = - &nir_intrinsic_infos[intrinsic1->intrinsic]; - - if (intrinsic1->intrinsic != intrinsic2->intrinsic || - intrinsic1->num_components != intrinsic2->num_components) - return false; - - if (info->has_dest && intrinsic1->dest.ssa.num_components != - intrinsic2->dest.ssa.num_components) - return false; - - for (unsigned i = 0; i < info->num_srcs; i++) { - if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) - return false; - } - - assert(info->num_variables == 0); - - for (unsigned i = 0; i < info->num_indices; i++) { - if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) - return false; - } - - return true; - } - case nir_instr_type_call: - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - case nir_instr_type_parallel_copy: - default: - unreachable("Invalid instruction type"); - } - - return false; -} - -static bool -src_is_ssa(nir_src *src, void *data) -{ - (void) data; - return src->is_ssa; -} - -static bool -dest_is_ssa(nir_dest *dest, void *data) -{ - (void) data; - return dest->is_ssa; -} +/* + * Visits and CSE's the given block and all its descendants in the dominance + * tree recursively. Note that the instr_set is guaranteed to only ever + * contain instructions that dominate the current block. + */ static bool -nir_instr_can_cse(nir_instr *instr) -{ - /* We only handle SSA. */ - if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || - !nir_foreach_src(instr, src_is_ssa, NULL)) - return false; - - switch (instr->type) { - case nir_instr_type_alu: - case nir_instr_type_tex: - case nir_instr_type_load_const: - case nir_instr_type_phi: - return true; - case nir_instr_type_intrinsic: { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; - return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER) && - info->num_variables == 0; /* not implemented yet */ - } - case nir_instr_type_call: - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - return false; - case nir_instr_type_parallel_copy: - default: - unreachable("Invalid instruction type"); - } - - return false; -} - -static nir_ssa_def * -nir_instr_get_dest_ssa_def(nir_instr *instr) +cse_block(nir_block *block, struct set *instr_set) { - switch (instr->type) { - case nir_instr_type_alu: - assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); - return &nir_instr_as_alu(instr)->dest.dest.ssa; - case nir_instr_type_tex: - assert(nir_instr_as_tex(instr)->dest.is_ssa); - return &nir_instr_as_tex(instr)->dest.ssa; - case nir_instr_type_load_const: - return &nir_instr_as_load_const(instr)->def; - case nir_instr_type_phi: - assert(nir_instr_as_phi(instr)->dest.is_ssa); - return &nir_instr_as_phi(instr)->dest.ssa; - case nir_instr_type_intrinsic: - assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); - return &nir_instr_as_intrinsic(instr)->dest.ssa; - default: - unreachable("We never ask for any of these"); - } -} - -static void -nir_opt_cse_instr(nir_instr *instr, struct cse_state *state) -{ - if (!nir_instr_can_cse(instr)) - return; + bool progress = false; - for (struct exec_node *node = instr->node.prev; - !exec_node_is_head_sentinel(node); node = node->prev) { - nir_instr *other = exec_node_data(nir_instr, node, node); - if (nir_instrs_equal(instr, other)) { - nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other); - nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr), - nir_src_for_ssa(other_def)); + nir_foreach_instr_safe(block, instr) { + if (nir_instr_set_add_or_rewrite(instr_set, instr)) { + progress = true; nir_instr_remove(instr); - state->progress = true; - return; } } - for (nir_block *block = instr->block->imm_dom; - block != NULL; block = block->imm_dom) { - nir_foreach_instr_reverse(block, other) { - if (nir_instrs_equal(instr, other)) { - nir_ssa_def *other_def = nir_instr_get_dest_ssa_def(other); - nir_ssa_def_rewrite_uses(nir_instr_get_dest_ssa_def(instr), - nir_src_for_ssa(other_def)); - nir_instr_remove(instr); - state->progress = true; - return; - } - } + for (unsigned i = 0; i < block->num_dom_children; i++) { + nir_block *child = block->dom_children[i]; + progress |= cse_block(child, instr_set); } -} - -static bool -nir_opt_cse_block(nir_block *block, void *void_state) -{ - struct cse_state *state = void_state; - nir_foreach_instr_safe(block, instr) - nir_opt_cse_instr(instr, state); + nir_foreach_instr(block, instr) + nir_instr_set_remove(instr_set, instr); - return true; + return progress; } static bool nir_opt_cse_impl(nir_function_impl *impl) { - struct cse_state state; - - state.mem_ctx = ralloc_parent(impl); - state.progress = false; + struct set *instr_set = nir_instr_set_create(NULL); nir_metadata_require(impl, nir_metadata_dominance); - nir_foreach_block(impl, nir_opt_cse_block, &state); + bool progress = cse_block(nir_start_block(impl), instr_set); - if (state.progress) + if (progress) nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); - return state.progress; + nir_instr_set_destroy(instr_set); + return progress; } bool @@ -336,3 +90,4 @@ nir_opt_cse(nir_shader *shader) return progress; } + diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c index b6ce43b5224..5a22f509f50 100644 --- a/src/glsl/nir/nir_sweep.c +++ b/src/glsl/nir/nir_sweep.c @@ -155,6 +155,8 @@ nir_sweep(nir_shader *nir) ralloc_adopt(rubbish, nir); ralloc_steal(nir, (char *)nir->info.name); + if (nir->info.label) + ralloc_steal(nir, (char *)nir->info.label); /* Variables and registers are not dead. Steal them back. */ steal_list(nir, nir_variable, &nir->uniforms); diff --git a/src/glsl/nir/nir_types.cpp b/src/glsl/nir/nir_types.cpp index 01f0e9b5abc..4a1250e546c 100644 --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@ -118,6 +118,12 @@ glsl_get_length(const struct glsl_type *type) return type->is_matrix() ? type->matrix_columns : type->length; } +unsigned +glsl_get_aoa_size(const struct glsl_type *type) +{ + return type->arrays_of_arrays_size(); +} + const char * glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index) { diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 1a0cb1fb774..a61af6cba75 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -31,7 +31,7 @@ /* C wrapper around glsl_types.h */ -#include "../glsl_types.h" +#include "glsl_types.h" #ifdef __cplusplus extern "C" { @@ -65,6 +65,8 @@ unsigned glsl_get_matrix_columns(const struct glsl_type *type); unsigned glsl_get_length(const struct glsl_type *type); +unsigned glsl_get_aoa_size(const struct glsl_type *type); + const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); diff --git a/src/glsl/shader_enums.c b/src/glsl/nir/shader_enums.c index c196b791d4f..66a25e72344 100644 --- a/src/glsl/shader_enums.c +++ b/src/glsl/nir/shader_enums.c @@ -26,8 +26,9 @@ * Rob Clark <[email protected]> */ -#include "glsl/shader_enums.h" +#include "shader_enums.h" #include "util/macros.h" +#include "mesa/main/config.h" #define ENUM(x) [x] = #x #define NAME(val) ((((val) < ARRAY_SIZE(names)) && names[(val)]) ? names[(val)] : "UNKNOWN") @@ -42,6 +43,7 @@ const char * gl_shader_stage_name(gl_shader_stage stage) ENUM(MESA_SHADER_FRAGMENT), ENUM(MESA_SHADER_COMPUTE), }; + STATIC_ASSERT(ARRAY_SIZE(names) == MESA_SHADER_STAGES); return NAME(stage); } @@ -82,6 +84,7 @@ const char * gl_vert_attrib_name(gl_vert_attrib attrib) ENUM(VERT_ATTRIB_GENERIC14), ENUM(VERT_ATTRIB_GENERIC15), }; + STATIC_ASSERT(ARRAY_SIZE(names) == VERT_ATTRIB_MAX); return NAME(attrib); } @@ -147,6 +150,7 @@ const char * gl_varying_slot_name(gl_varying_slot slot) ENUM(VARYING_SLOT_VAR30), ENUM(VARYING_SLOT_VAR31), }; + STATIC_ASSERT(ARRAY_SIZE(names) == VARYING_SLOT_MAX); return NAME(slot); } @@ -169,8 +173,10 @@ const char * gl_system_value_name(gl_system_value sysval) ENUM(SYSTEM_VALUE_TESS_LEVEL_INNER), ENUM(SYSTEM_VALUE_LOCAL_INVOCATION_ID), ENUM(SYSTEM_VALUE_WORK_GROUP_ID), + ENUM(SYSTEM_VALUE_NUM_WORK_GROUPS), ENUM(SYSTEM_VALUE_VERTEX_CNT), }; + STATIC_ASSERT(ARRAY_SIZE(names) == SYSTEM_VALUE_MAX); return NAME(sysval); } @@ -182,6 +188,7 @@ const char * glsl_interp_qualifier_name(enum glsl_interp_qualifier qual) ENUM(INTERP_QUALIFIER_FLAT), ENUM(INTERP_QUALIFIER_NOPERSPECTIVE), }; + STATIC_ASSERT(ARRAY_SIZE(names) == INTERP_QUALIFIER_COUNT); return NAME(qual); } @@ -201,5 +208,6 @@ const char * gl_frag_result_name(gl_frag_result result) ENUM(FRAG_RESULT_DATA6), ENUM(FRAG_RESULT_DATA7), }; + STATIC_ASSERT(ARRAY_SIZE(names) == FRAG_RESULT_MAX); return NAME(result); } diff --git a/src/glsl/shader_enums.h b/src/glsl/nir/shader_enums.h index 2a5d2c5bfa7..d1cf7ca04cc 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/nir/shader_enums.h @@ -233,6 +233,11 @@ typedef enum VARYING_SLOT_VAR31, } gl_varying_slot; + +#define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING) +#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX) +#define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING) + const char * gl_varying_slot_name(gl_varying_slot slot); /** @@ -473,4 +478,23 @@ typedef enum const char * gl_frag_result_name(gl_frag_result result); +#define FRAG_RESULT_MAX (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS) + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * Extension AMD_conservative_depth allows gl_FragDepth to be redeclared with + * a layout qualifier. + * + * \see enum ir_depth_layout + */ +enum gl_frag_depth_layout +{ + FRAG_DEPTH_LAYOUT_NONE, /**< No layout is specified. */ + FRAG_DEPTH_LAYOUT_ANY, + FRAG_DEPTH_LAYOUT_GREATER, + FRAG_DEPTH_LAYOUT_LESS, + FRAG_DEPTH_LAYOUT_UNCHANGED +}; + #endif /* SHADER_ENUMS_H */ diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp index 2cb7f41adef..c5be166e75a 100644 --- a/src/glsl/opt_dead_code.cpp +++ b/src/glsl/opt_dead_code.cpp @@ -75,24 +75,35 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) || !entry->declaration) continue; - if (entry->assign) { - /* Remove a single dead assignment to the variable we found. - * Don't do so if it's a shader or function output or a shader - * storage variable though. + if (!entry->assign_list.is_empty()) { + /* Remove all the dead assignments to the variable we found. + * Don't do so if it's a shader or function output, though. */ if (entry->var->data.mode != ir_var_function_out && entry->var->data.mode != ir_var_function_inout && entry->var->data.mode != ir_var_shader_out && entry->var->data.mode != ir_var_shader_storage) { - entry->assign->remove(); - progress = true; - if (debug) { - printf("Removed assignment to %s@%p\n", - entry->var->name, (void *) entry->var); - } + while (!entry->assign_list.is_empty()) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, + entry->assign_list.head, link); + + assignment_entry->assign->remove(); + + if (debug) { + printf("Removed assignment to %s@%p\n", + entry->var->name, (void *) entry->var); + } + + assignment_entry->link.remove(); + free(assignment_entry); + } + progress = true; } - } else { + } + + if (entry->assign_list.is_empty()) { /* If there are no assignments or references to the variable left, * then we can remove its declaration. */ @@ -103,7 +114,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) */ if (entry->var->data.mode == ir_var_uniform || entry->var->data.mode == ir_var_shader_storage) { - if (uniform_locations_assigned || entry->var->constant_value) + if (uniform_locations_assigned || entry->var->constant_initializer) continue; /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp index a7a219c55ca..e38a0e93058 100644 --- a/src/glsl/opt_tree_grafting.cpp +++ b/src/glsl/opt_tree_grafting.cpp @@ -373,8 +373,6 @@ tree_grafting_basic_block(ir_instruction *bb_first, entry->referenced_count != 2) continue; - assert(assign == entry->assign); - /* Found a possibly graftable assignment. Now, walk through the * rest of the BB seeing if the deref is here, and if nothing interfered with * pasting its expression's values in between. diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index 05140192893..3a95360eda6 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -113,9 +113,18 @@ _mesa_clear_shader_program_data(struct gl_shader_program *shProg) ralloc_free(shProg->InfoLog); shProg->InfoLog = ralloc_strdup(shProg, ""); + ralloc_free(shProg->BufferInterfaceBlocks); + shProg->BufferInterfaceBlocks = NULL; + shProg->NumBufferInterfaceBlocks = 0; + ralloc_free(shProg->UniformBlocks); shProg->UniformBlocks = NULL; - shProg->NumBufferInterfaceBlocks = 0; + shProg->NumUniformBlocks = 0; + + ralloc_free(shProg->ShaderStorageBlocks); + shProg->ShaderStorageBlocks = NULL; + shProg->NumShaderStorageBlocks = 0; + for (i = 0; i < MESA_SHADER_STAGES; i++) { ralloc_free(shProg->UniformBlockStageIndex[i]); shProg->UniformBlockStageIndex[i] = NULL; |