diff options
author | Jason Ekstrand <[email protected]> | 2015-12-10 16:58:24 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-12-10 18:29:36 -0800 |
commit | d5c9955d3eaa7311e2b2350b6964bae516c7b7b2 (patch) | |
tree | 401e4378dd9909f9010f076d026a77e515170be5 /src/glsl | |
parent | 8beea9d45b5879ea3dbd9c0e48f0c0eb2451f380 (diff) | |
parent | 78b81be627734ea7fa50ea246c07b0d4a3a1638a (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
This pulls in nir_intrinsic_load/store changes and the switch of all
uniforms in i965 to bytes. This accounts for the Vulkan changes.
Diffstat (limited to 'src/glsl')
48 files changed, 2037 insertions, 981 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 0c9fd75d206..e64c31e17c6 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -160,6 +160,8 @@ LIBGLSL_FILES = \ loop_analysis.h \ loop_controls.cpp \ loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ lower_clip_distance.cpp \ lower_const_arrays_to_uniforms.cpp \ lower_discard.cpp \ @@ -184,6 +186,7 @@ LIBGLSL_FILES = \ lower_vector_insert.cpp \ lower_vertex_id.cpp \ lower_output_reads.cpp \ + lower_shared_reference.cpp \ lower_ubo_reference.cpp \ opt_algebraic.cpp \ opt_array_splitting.cpp \ diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 3bea63ea0ed..adfc7938bff 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -699,16 +699,16 @@ struct ast_type_qualifier { bool merge_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q); + const ast_type_qualifier &q); bool merge_out_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q, + const ast_type_qualifier &q, ast_node* &node); bool merge_in_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q, + const ast_type_qualifier &q, ast_node* &node); ast_subroutine_list *subroutine_list; @@ -1152,7 +1152,7 @@ class ast_cs_input_layout : public ast_node { public: ast_cs_input_layout(const struct YYLTYPE &locp, - ast_layout_expression **local_size) + ast_layout_expression *const *local_size) { for (int i = 0; i < 3; i++) { this->local_size[i] = local_size[i]; @@ -1197,6 +1197,6 @@ check_builtin_array_max_size(const char *name, unsigned size, extern void _mesa_ast_process_interface_block(YYLTYPE *locp, _mesa_glsl_parse_state *state, ast_interface_block *const block, - const struct ast_type_qualifier q); + const struct ast_type_qualifier &q); #endif /* AST_H */ diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index 466ece67424..e32a588f091 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -143,19 +143,21 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, } static bool -verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, +verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, ir_variable *var) { - if (!var || !var->is_in_shader_storage_block()) { + if (!var || + (!var->is_in_shader_storage_block() && + var->data.mode != ir_var_shader_shared)) { _mesa_glsl_error(loc, state, "First argument to atomic function " - "must be a buffer variable"); + "must be a buffer or shared variable"); return false; } return true; } static bool -is_atomic_ssbo_function(const char *func_name) +is_atomic_function(const char *func_name) { return !strcmp(func_name, "atomicAdd") || !strcmp(func_name, "atomicMin") || @@ -276,16 +278,16 @@ verify_parameter_modes(_mesa_glsl_parse_state *state, /* The first parameter of atomic functions must be a buffer variable */ const char *func_name = sig->function_name(); - bool is_atomic_ssbo = is_atomic_ssbo_function(func_name); - if (is_atomic_ssbo) { + bool is_atomic = is_atomic_function(func_name); + if (is_atomic) { const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head; const ast_expression *const actual_ast = exec_node_data(ast_expression, actual_ast_parameters.head, link); YYLTYPE loc = actual_ast->get_location(); - if (!verify_first_atomic_ssbo_parameter(&loc, state, - actual->variable_referenced())) { + if (!verify_first_atomic_parameter(&loc, state, + actual->variable_referenced())) { return false; } } @@ -1737,7 +1739,7 @@ ast_function_expression::handle_method(exec_list *instructions, result = new(ctx) ir_constant(op->type->array_size()); } } else if (op->type->is_vector()) { - if (state->ARB_shading_language_420pack_enable) { + if (state->has_420pack()) { /* .length() returns int. */ result = new(ctx) ir_constant((int) op->type->vector_elements); } else { @@ -1746,7 +1748,7 @@ ast_function_expression::handle_method(exec_list *instructions, goto fail; } } else if (op->type->is_matrix()) { - if (state->ARB_shading_language_420pack_enable) { + if (state->has_420pack()) { /* .length() returns int. */ result = new(ctx) ir_constant((int) op->type->matrix_columns); } else { @@ -2075,7 +2077,7 @@ ast_aggregate_initializer::hir(exec_list *instructions, } const glsl_type *const constructor_type = this->constructor_type; - if (!state->ARB_shading_language_420pack_enable) { + if (!state->has_420pack()) { _mesa_glsl_error(&loc, state, "C-style initialization requires the " "GL_ARB_shading_language_420pack extension"); return ir_rvalue::error_value(ctx); diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 52881a4da7a..fc6bb3e31f1 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -1825,7 +1825,7 @@ ast_expression::do_hir(exec_list *instructions, * tree. This particular use must be at location specified in the grammar * as 'variable_identifier'. */ - ir_variable *var = + ir_variable *var = state->symbols->get_variable(this->primary_expression.identifier); if (var != NULL) { @@ -2650,7 +2650,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state, return; } - } else if (state->is_version(420, 310) && base_type->is_image()) { + } else if ((state->is_version(420, 310) || + state->ARB_shading_language_420pack_enable) && + base_type->is_image()) { assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); if (max_index >= ctx->Const.MaxImageUnits) { _mesa_glsl_error(loc, state, "Image binding %d exceeds the " @@ -3737,7 +3739,7 @@ process_initializer(ir_variable *var, ast_declaration *decl, * expressions. Const-qualified global variables must still be * initialized with constant expressions. */ - if (!state->ARB_shading_language_420pack_enable + if (!state->has_420pack() || state->current_function == NULL) { _mesa_glsl_error(& initializer_loc, state, "initializer of %s variable `%s' must be a " @@ -5366,7 +5368,7 @@ ast_jump_statement::hir(exec_list *instructions, if (state->current_function->return_type != ret_type) { YYLTYPE loc = this->get_location(); - if (state->ARB_shading_language_420pack_enable) { + if (state->has_420pack()) { if (!apply_implicit_conversion(state->current_function->return_type, ret, state)) { _mesa_glsl_error(& loc, state, @@ -5558,8 +5560,8 @@ ast_switch_statement::hir(exec_list *instructions, /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: * - * "The type of init-expression in a switch statement must be a - * scalar integer." + * "The type of init-expression in a switch statement must be a + * scalar integer." */ if (!test_expression->type->is_scalar() || !test_expression->type->is_integer()) { diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index 03ed4dcfa2a..8643b7bfb76 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -116,7 +116,7 @@ ast_type_qualifier::interpolation_string() const bool ast_type_qualifier::merge_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q) + const ast_type_qualifier &q) { ast_type_qualifier ubo_mat_mask; ubo_mat_mask.flags.i = 0; @@ -293,7 +293,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, bool ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q, + const ast_type_qualifier &q, ast_node* &node) { void *mem_ctx = state; @@ -309,7 +309,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, bool ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ast_type_qualifier q, + const ast_type_qualifier &q, ast_node* &node) { void *mem_ctx = state; diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 881ee2b6b55..9973a763087 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -479,6 +479,12 @@ compute_shader(const _mesa_glsl_parse_state *state) } static bool +buffer_atomics_supported(const _mesa_glsl_parse_state *state) +{ + return compute_shader(state) || shader_storage_buffer_object(state); +} + +static bool barrier_supported(const _mesa_glsl_parse_state *state) { return compute_shader(state) || @@ -606,8 +612,8 @@ private: ir_expression_operation opcode, const glsl_type *return_type, const glsl_type *param_type); - ir_function_signature *binop(ir_expression_operation opcode, - builtin_available_predicate avail, + ir_function_signature *binop(builtin_available_predicate avail, + ir_expression_operation opcode, const glsl_type *return_type, const glsl_type *param0_type, const glsl_type *param1_type); @@ -774,16 +780,16 @@ private: ir_function_signature *_atomic_counter_op(const char *intrinsic, builtin_available_predicate avail); - ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_op2(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_ssbo_op3(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); + ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); B1(min3) B1(max3) @@ -930,53 +936,53 @@ builtin_builder::create_intrinsics() _atomic_counter_intrinsic(shader_atomic_counters), NULL); - add_function("__intrinsic_ssbo_atomic_add", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_min", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_max", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_and", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_or", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_xor", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_exchange", - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic2(shader_storage_buffer_object, - glsl_type::int_type), - NULL); - add_function("__intrinsic_ssbo_atomic_comp_swap", - _atomic_ssbo_intrinsic3(shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_intrinsic3(shader_storage_buffer_object, - glsl_type::int_type), + add_function("__intrinsic_atomic_add", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_min", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_max", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_and", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_or", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_xor", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_exchange", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_comp_swap", + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::int_type), NULL); add_image_functions(false); @@ -1336,7 +1342,7 @@ builtin_builder::create_builtins() _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), NULL); - + FD130(isnan) FD130(isinf) @@ -1373,7 +1379,7 @@ builtin_builder::create_builtins() FD(distance) FD(dot) - add_function("cross", _cross(always_available, glsl_type::vec3_type), + add_function("cross", _cross(always_available, glsl_type::vec3_type), _cross(fp64, glsl_type::dvec3_type), NULL); FD(normalize) @@ -2682,68 +2688,68 @@ builtin_builder::create_builtins() NULL); add_function("atomicAdd", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicMin", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicMax", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicAnd", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicOr", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicXor", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicExchange", - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("atomicCompSwap", - _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap", - shader_storage_buffer_object, - glsl_type::uint_type), - _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap", - shader_storage_buffer_object, - glsl_type::int_type), + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::int_type), NULL); add_function("min3", @@ -3114,8 +3120,8 @@ builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *typ } ir_function_signature * -builtin_builder::binop(ir_expression_operation opcode, - builtin_available_predicate avail, +builtin_builder::binop(builtin_available_predicate avail, + ir_expression_operation opcode, const glsl_type *return_type, const glsl_type *param0_type, const glsl_type *param1_type) @@ -3411,7 +3417,7 @@ builtin_builder::_atanh(const glsl_type *type) ir_function_signature * builtin_builder::_pow(const glsl_type *type) { - return binop(ir_binop_pow, always_available, type, type, type); + return binop(always_available, ir_binop_pow, type, type, type); } UNOP(exp, ir_unop_exp, always_available) @@ -3435,7 +3441,7 @@ UNOPA(fract, ir_unop_fract) ir_function_signature * builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) { - return binop(ir_binop_mod, always_available, x_type, x_type, y_type); + return binop(always_available, ir_binop_mod, x_type, x_type, y_type); } ir_function_signature * @@ -3457,14 +3463,14 @@ ir_function_signature * builtin_builder::_min(builtin_available_predicate avail, const glsl_type *x_type, const glsl_type *y_type) { - return binop(ir_binop_min, avail, x_type, x_type, y_type); + return binop(avail, ir_binop_min, x_type, x_type, y_type); } ir_function_signature * builtin_builder::_max(builtin_available_predicate avail, const glsl_type *x_type, const glsl_type *y_type) { - return binop(ir_binop_max, avail, x_type, x_type, y_type); + return binop(avail, ir_binop_max, x_type, x_type, y_type); } ir_function_signature * @@ -3793,9 +3799,9 @@ ir_function_signature * builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) { if (type->vector_elements == 1) - return binop(ir_binop_mul, avail, type, type, type); + return binop(avail, ir_binop_mul, type, type, type); - return binop(ir_binop_dot, avail, + return binop(avail, ir_binop_dot, type->get_base_type(), type, type); } @@ -4311,7 +4317,7 @@ ir_function_signature * builtin_builder::_lessThan(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_less, avail, + return binop(avail, ir_binop_less, glsl_type::bvec(type->vector_elements), type, type); } @@ -4319,7 +4325,7 @@ ir_function_signature * builtin_builder::_lessThanEqual(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_lequal, avail, + return binop(avail, ir_binop_lequal, glsl_type::bvec(type->vector_elements), type, type); } @@ -4327,7 +4333,7 @@ ir_function_signature * builtin_builder::_greaterThan(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_greater, avail, + return binop(avail, ir_binop_greater, glsl_type::bvec(type->vector_elements), type, type); } @@ -4335,7 +4341,7 @@ ir_function_signature * builtin_builder::_greaterThanEqual(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_gequal, avail, + return binop(avail, ir_binop_gequal, glsl_type::bvec(type->vector_elements), type, type); } @@ -4343,7 +4349,7 @@ ir_function_signature * builtin_builder::_equal(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_equal, avail, + return binop(avail, ir_binop_equal, glsl_type::bvec(type->vector_elements), type, type); } @@ -4351,7 +4357,7 @@ ir_function_signature * builtin_builder::_notEqual(builtin_available_predicate avail, const glsl_type *type) { - return binop(ir_binop_nequal, avail, + return binop(avail, ir_binop_nequal, glsl_type::bvec(type->vector_elements), type, type); } @@ -4939,7 +4945,8 @@ builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) ir_function_signature * builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) { - return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type); + return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, + ir_binop_ldexp, x_type, x_type, exp_type); } ir_function_signature * @@ -5096,8 +5103,8 @@ builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail) } ir_function_signature * -builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail, - const glsl_type *type) +builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type) { ir_variable *atomic = in_var(type, "atomic"); ir_variable *data = in_var(type, "data"); @@ -5106,8 +5113,8 @@ builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail, } ir_function_signature * -builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail, - const glsl_type *type) +builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type) { ir_variable *atomic = in_var(type, "atomic"); ir_variable *data1 = in_var(type, "data1"); @@ -5131,9 +5138,9 @@ builtin_builder::_atomic_counter_op(const char *intrinsic, } ir_function_signature * -builtin_builder::_atomic_ssbo_op2(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type) +builtin_builder::_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) { ir_variable *atomic = in_var(type, "atomic_var"); ir_variable *data = in_var(type, "atomic_data"); @@ -5147,9 +5154,9 @@ builtin_builder::_atomic_ssbo_op2(const char *intrinsic, } ir_function_signature * -builtin_builder::_atomic_ssbo_op3(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type) +builtin_builder::_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) { ir_variable *atomic = in_var(type, "atomic_var"); ir_variable *data1 = in_var(type, "atomic_data1"); diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 5a8f98019d1..7eb383ac60c 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -948,7 +948,7 @@ parameter_qualifier: if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out)) _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier"); - if (!state->has_420pack() && $2.flags.q.constant) + if (!state->has_420pack_or_es31() && $2.flags.q.constant) _mesa_glsl_error(&@1, state, "in/out/inout must come after const " "or precise"); @@ -960,7 +960,7 @@ parameter_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!(state->has_420pack() || state->is_version(420, 310)) && + if (!state->has_420pack_or_es31() && $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); @@ -1482,7 +1482,7 @@ layout_qualifier_id: $$.index = $3; } - if ((state->has_420pack() || + if ((state->has_420pack_or_es31() || state->has_atomic_counters() || state->has_shader_storage_buffer_objects()) && match_layout_qualifier("binding", $1, state) == 0) { @@ -1714,7 +1714,7 @@ type_qualifier: if ($2.flags.q.invariant) _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier"); - if (!state->has_420pack() && $2.flags.q.precise) + if (!state->has_420pack_or_es31() && $2.flags.q.precise) _mesa_glsl_error(&@1, state, "\"invariant\" must come after \"precise\""); @@ -1747,7 +1747,7 @@ type_qualifier: if ($2.has_interpolation()) _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier"); - if (!state->has_420pack() && + if (!state->has_420pack_or_es31() && ($2.flags.q.precise || $2.flags.q.invariant)) { _mesa_glsl_error(&@1, state, "interpolation qualifiers must come " "after \"precise\" or \"invariant\""); @@ -1767,7 +1767,7 @@ type_qualifier: * precise qualifiers since these are useful in ARB_separate_shader_objects. * There is no clear spec guidance on this either. */ - if (!state->has_420pack() && $2.has_layout()) + if (!state->has_420pack_or_es31() && $2.has_layout()) _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); $$ = $1; @@ -1785,7 +1785,7 @@ type_qualifier: "duplicate auxiliary storage qualifier (centroid or sample)"); } - if (!state->has_420pack() && + if (!state->has_420pack_or_es31() && ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() || $2.has_layout())) { _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come " @@ -1803,7 +1803,7 @@ type_qualifier: if ($2.has_storage()) _mesa_glsl_error(&@1, state, "duplicate storage qualifier"); - if (!state->has_420pack() && + if (!state->has_420pack_or_es31() && ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() || $2.has_layout() || $2.has_auxiliary_storage())) { _mesa_glsl_error(&@1, state, "storage qualifiers must come after " @@ -1819,7 +1819,7 @@ type_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!(state->has_420pack() || state->is_version(420, 310)) && + if (!(state->has_420pack_or_es31()) && $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); @@ -2575,7 +2575,7 @@ interface_block: { ast_interface_block *block = (ast_interface_block *) $2; - if (!state->has_420pack() && block->layout.has_layout() && + if (!state->has_420pack_or_es31() && block->layout.has_layout() && !block->layout.is_default_qualifier) { _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); YYERROR; diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index b41b64af2c1..3988376ea9d 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -479,7 +479,7 @@ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state, struct gl_context *ctx = state->ctx; /* Report the error via GL_ARB_debug_output. */ - _mesa_shader_debug(ctx, type, &msg_id, msg, strlen(msg)); + _mesa_shader_debug(ctx, type, &msg_id, msg); ralloc_strcat(&state->info_log, "\n"); } @@ -876,7 +876,7 @@ void _mesa_ast_process_interface_block(YYLTYPE *locp, _mesa_glsl_parse_state *state, ast_interface_block *const block, - const struct ast_type_qualifier q) + const struct ast_type_qualifier &q) { if (q.flags.q.buffer) { if (!state->has_shader_storage_buffer_objects()) { @@ -1088,7 +1088,7 @@ void ast_compound_statement::print(void) const { printf("{\n"); - + foreach_list_typed(ast_node, ast, link, &this->statements) { ast->print(); } @@ -1414,7 +1414,6 @@ ast_selection_statement::print(void) const printf("else "); else_statement->print(); } - } diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 17ff0b5af79..a4bda772a0f 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -97,7 +97,7 @@ struct _mesa_glsl_parse_state { * supports the feature. * * \param required_glsl_es_version is the GLSL ES version that is required - * to support the feature, or 0 if no version of GLSL ES suports the + * to support the feature, or 0 if no version of GLSL ES supports the * feature. */ bool is_version(unsigned required_glsl_version, @@ -255,6 +255,11 @@ struct _mesa_glsl_parse_state { return ARB_shading_language_420pack_enable || is_version(420, 0); } + bool has_420pack_or_es31() const + { + return ARB_shading_language_420pack_enable || is_version(420, 310); + } + bool has_compute_shader() const { return ARB_compute_shader_enable || is_version(430, 310); diff --git a/src/glsl/hir_field_selection.cpp b/src/glsl/hir_field_selection.cpp index 337095b95b8..92bb4139194 100644 --- a/src/glsl/hir_field_selection.cpp +++ b/src/glsl/hir_field_selection.cpp @@ -57,8 +57,7 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr, expr->primary_expression.identifier); } } else if (op->type->is_vector() || - (state->ARB_shading_language_420pack_enable && - op->type->is_scalar())) { + (state->has_420pack() && op->type->is_scalar())) { ir_swizzle *swiz = ir_swizzle::create(op, expr->primary_expression.identifier, op->type->vector_elements); diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index ca520f547a1..f989e9b6dff 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1669,6 +1669,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this->data.pixel_center_integer = false; this->data.depth_layout = ir_depth_layout_none; this->data.used = false; + this->data.always_active_io = false; this->data.read_only = false; this->data.centroid = false; this->data.sample = false; diff --git a/src/glsl/ir.h b/src/glsl/ir.h index e1109eec1d3..bdc932ef538 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -659,6 +659,13 @@ public: unsigned assigned:1; /** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other input/outputs must remains active. + */ + unsigned always_active_io:1; + + /** * Enum indicating how the variable was declared. See * ir_var_declaration_type. * diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index 67ed3605a8c..ef705851613 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -41,14 +41,6 @@ #include "glsl_types.h" #include "program/hash_table.h" -#if defined(__SUNPRO_CC) && !defined(isnormal) -#include <ieeefp.h> -static int isnormal(double x) -{ - return fpclass(x) == FP_NORMAL; -} -#endif - static float dot_f(ir_constant *op0, ir_constant *op1) { diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index 2fee81c09c2..dabd80a8d0d 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -124,6 +124,7 @@ bool lower_const_arrays_to_uniforms(exec_list *instructions); bool lower_clip_distance(gl_shader *shader); void lower_output_reads(unsigned stage, exec_list *instructions); bool lower_packing_builtins(exec_list *instructions, int op_mask); +void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); void lower_ubo_reference(struct gl_shader *shader); void lower_packed_varyings(void *mem_ctx, unsigned locations_used, ir_variable_mode mode, diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index 07720e28749..7c0af1b712f 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -93,7 +93,7 @@ ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos) ir_read_error(NULL, "couldn't parse S-Expression."); return; } - + if (scan_for_protos) { scan_for_prototypes(instructions, expr); if (state->error) @@ -147,7 +147,7 @@ ir_reader::read_type(s_expression *expr) return glsl_type::get_array_instance(base_type, s_size->value()); } - + s_symbol *type_sym = SX_AS_SYMBOL(expr); if (type_sym == NULL) { ir_read_error(expr, "expected <type>"); diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index c0b4b3e820c..71750d1b42b 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -766,7 +766,7 @@ public: gl_shader_stage consumer_stage); ~varying_matches(); void record(ir_variable *producer_var, ir_variable *consumer_var); - unsigned assign_locations(uint64_t reserved_slots); + unsigned assign_locations(uint64_t reserved_slots, bool separate_shader); void store_locations() const; private: @@ -896,8 +896,10 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) { assert(producer_var != NULL || consumer_var != NULL); - if ((producer_var && !producer_var->data.is_unmatched_generic_inout) - || (consumer_var && !consumer_var->data.is_unmatched_generic_inout)) { + if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || + producer_var->data.explicit_location)) || + (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || + consumer_var->data.explicit_location))) { /* Either a location already exists for this variable (since it is part * of fixed functionality), or it has already been recorded as part of a * previous match. @@ -986,11 +988,36 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) * passed to varying_matches::record(). */ unsigned -varying_matches::assign_locations(uint64_t reserved_slots) +varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader) { - /* Sort varying matches into an order that makes them easy to pack. */ - qsort(this->matches, this->num_matches, sizeof(*this->matches), - &varying_matches::match_comparator); + /* We disable varying sorting for separate shader programs for the + * following reasons: + * + * 1/ All programs must sort the code in the same order to guarantee the + * interface matching. However varying_matches::record() will change the + * interpolation qualifier of some stages. + * + * 2/ GLSL version 4.50 removes the matching constrain on the interpolation + * qualifier. + * + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec: + * + * "The type and presence of interpolation qualifiers of variables with + * the same name declared in all linked shaders for the same cross-stage + * interface must match, otherwise the link command will fail. + * + * When comparing an output from one stage to an input of a subsequent + * stage, the input and output don't match if their interpolation + * qualifiers (or lack thereof) are not the same." + * + * "It is a link-time error if, within the same stage, the interpolation + * qualifiers of variables of the same name do not match." + */ + if (!separate_shader) { + /* Sort varying matches into an order that makes them easy to pack. */ + qsort(this->matches, this->num_matches, sizeof(*this->matches), + &varying_matches::match_comparator); + } unsigned generic_location = 0; unsigned generic_patch_location = MAX_VARYING*4; @@ -1590,7 +1617,8 @@ assign_varying_locations(struct gl_context *ctx, reserved_varying_slot(producer, ir_var_shader_out) | reserved_varying_slot(consumer, ir_var_shader_in); - const unsigned slots_used = matches.assign_locations(reserved_slots); + const unsigned slots_used = matches.assign_locations(reserved_slots, + prog->SeparateShader); matches.store_locations(); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 331d9a28007..a87bbb2b994 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -631,20 +631,12 @@ link_invalidate_variable_locations(exec_list *ir) /* ir_variable::is_unmatched_generic_inout is used by the linker while * connecting outputs from one stage to inputs of the next stage. - * - * There are two implicit assumptions here. First, we assume that any - * built-in variable (i.e., non-generic in or out) will have - * explicit_location set. Second, we assume that any generic in or out - * will not have explicit_location set. - * - * This second assumption will only be valid until - * GL_ARB_separate_shader_objects is supported. When that extension is - * implemented, this function will need some modifications. */ - if (!var->data.explicit_location) { - var->data.is_unmatched_generic_inout = 1; - } else { + if (var->data.explicit_location && + var->data.location < VARYING_SLOT_VAR0) { var->data.is_unmatched_generic_inout = 0; + } else { + var->data.is_unmatched_generic_inout = 1; } } } @@ -2421,6 +2413,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog, continue; if (var->data.explicit_location) { + var->data.is_unmatched_generic_inout = 0; if ((var->data.location >= (int)(max_index + generic_base)) || (var->data.location < 0)) { linker_error(prog, @@ -2690,6 +2683,53 @@ assign_attribute_or_color_locations(gl_shader_program *prog, return true; } +/** + * Match explicit locations of outputs to inputs and deactivate the + * unmatch flag if found so we don't optimise them away. + */ +static void +match_explicit_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, + gl_shader *consumer) +{ + glsl_symbol_table parameters; + ir_variable *explicit_locations[MAX_VARYING] = { NULL }; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_shader_out)) + continue; + + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + const unsigned idx = var->data.location - VARYING_SLOT_VAR0; + if (explicit_locations[idx] == NULL) + explicit_locations[idx] = var; + } + } + + /* Match inputs to outputs */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input = node->as_variable(); + + if ((input == NULL) || (input->data.mode != ir_var_shader_in)) + continue; + + ir_variable *output = NULL; + if (input->data.explicit_location + && input->data.location >= VARYING_SLOT_VAR0) { + output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; + + if (output != NULL){ + input->data.is_unmatched_generic_inout = 0; + output->data.is_unmatched_generic_inout = 0; + } + } + } +} /** * Demote shader inputs and outputs that are not used in other stages @@ -3940,6 +3980,77 @@ split_ubos_and_ssbos(void *mem_ctx, assert(*num_ubos + *num_ssbos == num_blocks); } +static void +set_always_active_io(exec_list *ir, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode) + continue; + + /* Don't set always active on builtins that haven't been redeclared */ + if (var->data.how_declared == ir_var_declared_implicitly) + continue; + + var->data.always_active_io = true; + } +} + +/** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other inputs/outputs must remain active. + */ +static void +disable_varying_optimizations_for_sso(struct gl_shader_program *prog) +{ + unsigned first, last; + assert(prog->SeparateShader); + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. Excluding the compute stage */ + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + if (first == MESA_SHADER_STAGES) + return; + + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { + gl_shader *sh = prog->_LinkedShaders[stage]; + if (!sh) + continue; + + if (first == last) { + /* For a single shader program only allow inputs to the vertex shader + * and outputs from the fragment shader to be removed. + */ + if (stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + if (stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } else { + /* For multi-stage separate shader programs only allow inputs and + * outputs between the shader stages to be removed as well as inputs + * to the vertex shader and outputs from the fragment shader. + */ + if (stage == first && stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + else if (stage == last && stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } + } +} + void link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) { @@ -4139,11 +4250,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (!prog->LinkStatus) goto done; - unsigned prev; + unsigned first, last, prev; - for (prev = 0; prev <= MESA_SHADER_FRAGMENT; prev++) { - if (prog->_LinkedShaders[prev] != NULL) - break; + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; } check_explicit_uniform_locations(ctx, prog); @@ -4157,6 +4275,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) /* Validate the inputs of each stage with the output of the preceding * stage. */ + prev = first; for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { if (prog->_LinkedShaders[i] == NULL) continue; @@ -4199,6 +4318,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } } + if (prog->SeparateShader) + disable_varying_optimizations_for_sso(prog); + if (!interstage_cross_validate_uniform_blocks(prog)) goto done; @@ -4250,6 +4372,16 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) } } + prev = first; + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + prev = i; + } + if (!assign_attribute_or_color_locations(prog, &ctx->Const, MESA_SHADER_VERTEX)) { goto done; @@ -4260,20 +4392,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) goto done; } - unsigned first, last; - - first = MESA_SHADER_STAGES; - last = 0; - - /* Determine first and last stage. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (!prog->_LinkedShaders[i]) - continue; - if (first == MESA_SHADER_STAGES) - first = i; - last = i; - } - if (num_tfeedback_decls != 0) { /* From GL_EXT_transform_feedback: * A program will fail to link if: @@ -4333,13 +4451,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, tfeedback_decls); - if (!prog->SeparateShader) + if (!prog->SeparateShader) { demote_shader_inputs_and_outputs(sh, ir_var_shader_out); - - /* Eliminate code that is now dead due to unused outputs being demoted. - */ - while (do_dead_code(sh->ir, false)) - ; + /* Eliminate code that is now dead due to unused outputs being + * demoted. + */ + while (do_dead_code(sh->ir, false)) + ; + } } else if (first == MESA_SHADER_FRAGMENT) { /* If the program only contains a fragment shader... @@ -4356,11 +4475,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) 0 /* num_tfeedback_decls */, NULL /* tfeedback_decls */)) goto done; - } else + } else { demote_shader_inputs_and_outputs(sh, ir_var_shader_in); - - while (do_dead_code(sh->ir, false)) - ; + /* Eliminate code that is now dead due to unused inputs being + * demoted. + */ + while (do_dead_code(sh->ir, false)) + ; + } } next = last; @@ -4485,6 +4607,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) lower_ubo_reference(prog->_LinkedShaders[i]); + if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables) + lower_shared_reference(prog->_LinkedShaders[i], + &prog->Comp.SharedSize); + lower_vector_derefs(prog->_LinkedShaders[i]); } diff --git a/src/glsl/list.h b/src/glsl/list.h index 15fcd4abd1c..a1c4d82b017 100644 --- a/src/glsl/list.h +++ b/src/glsl/list.h @@ -688,7 +688,7 @@ inline void exec_node::insert_before(exec_list *before) __node = __next, __next = \ exec_node_data(__type, (__next)->__field.next, __field)) -#define foreach_list_typed_safe_reverse(__type, __node, __field, __list) \ +#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \ for (__type * __node = \ exec_node_data(__type, (__list)->tail_pred, __field), \ * __prev = \ diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp new file mode 100644 index 00000000000..f8c8d140ea8 --- /dev/null +++ b/src/glsl/lower_buffer_access.cpp @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.cpp + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace lower_buffer_access { + +static inline int +writemask_for_size(unsigned n) +{ + return ((1 << n) - 1); +} + +/** + * Takes a deref and recursively calls itself to break the deref down to the + * point that the reads or writes generated are contiguous scalars or vectors. + */ +void +lower_buffer_access::emit_access(void *mem_ctx, + bool is_write, + ir_dereference *deref, + ir_variable *base_offset, + unsigned int deref_offset, + bool row_major, + int matrix_columns, + unsigned int packing, + unsigned int write_mask) +{ + if (deref->type->is_record()) { + unsigned int field_offset = 0; + + for (unsigned i = 0; i < deref->type->length; i++) { + const struct glsl_struct_field *field = + &deref->type->fields.structure[i]; + ir_dereference *field_deref = + new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), + field->name); + + field_offset = + glsl_align(field_offset, + field->type->std140_base_alignment(row_major)); + + emit_access(mem_ctx, is_write, field_deref, base_offset, + deref_offset + field_offset, + row_major, 1, packing, + writemask_for_size(field_deref->type->vector_elements)); + + field_offset += field->type->std140_size(row_major); + } + return; + } + + if (deref->type->is_array()) { + unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? + deref->type->fields.array->std430_array_stride(row_major) : + glsl_align(deref->type->fields.array->std140_size(row_major), 16); + + for (unsigned i = 0; i < deref->type->length; i++) { + ir_constant *element = new(mem_ctx) ir_constant(i); + ir_dereference *element_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), + element); + emit_access(mem_ctx, is_write, element_deref, base_offset, + deref_offset + i * array_stride, + row_major, 1, packing, + writemask_for_size(element_deref->type->vector_elements)); + } + return; + } + + if (deref->type->is_matrix()) { + for (unsigned i = 0; i < deref->type->matrix_columns; i++) { + ir_constant *col = new(mem_ctx) ir_constant(i); + ir_dereference *col_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); + + if (row_major) { + /* For a row-major matrix, the next column starts at the next + * element. + */ + int size_mul = deref->type->is_double() ? 8 : 4; + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } else { + int size_mul; + + /* std430 doesn't round up vec2 size to a vec4 size */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && + deref->type->vector_elements == 2 && + !deref->type->is_double()) { + size_mul = 8; + } else { + /* std140 always rounds the stride of arrays (and matrices) to a + * vec4, so matrices are always 16 between columns/rows. With + * doubles, they will be 32 apart when there are more than 2 rows. + * + * For both std140 and std430, if the member is a + * three-'component vector with components consuming N basic + * machine units, the base alignment is 4N. For vec4, base + * alignment is 4N. + */ + size_mul = (deref->type->is_double() && + deref->type->vector_elements > 2) ? 32 : 16; + } + + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } + } + return; + } + + assert(deref->type->is_scalar() || deref->type->is_vector()); + + if (!row_major) { + ir_rvalue *offset = + add(base_offset, new(mem_ctx) ir_constant(deref_offset)); + unsigned mask = + is_write ? write_mask : (1 << deref->type->vector_elements) - 1; + insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1); + } else { + unsigned N = deref->type->is_double() ? 8 : 4; + + /* We're dereffing a column out of a row-major matrix, so we + * gather the vector from each stored row. + */ + assert(deref->type->base_type == GLSL_TYPE_FLOAT || + deref->type->base_type == GLSL_TYPE_DOUBLE); + /* Matrices, row_major or not, are stored as if they were + * arrays of vectors of the appropriate size in std140. + * Arrays have their strides rounded up to a vec4, so the + * matrix stride is always 16. However a double matrix may either be 16 + * or 32 depending on the number of columns. + */ + assert(matrix_columns <= 4); + unsigned matrix_stride = 0; + /* Matrix stride for std430 mat2xY matrices are not rounded up to + * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform + * Block Layout": + * + * "2. If the member is a two- or four-component vector with components + * consuming N basic machine units, the base alignment is 2N or 4N, + * respectively." [...] + * "4. If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single array + * element, according to rules (1), (2), and (3), and rounded up to the + * base alignment of a vec4." [...] + * "7. If the member is a row-major matrix with C columns and R rows, the + * matrix is stored identically to an array of R row vectors with C + * components each, according to rule (4)." [...] + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures in + * rule 9 are not rounded up a multiple of the base alignment of a vec4." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) + matrix_stride = 2 * N; + else + matrix_stride = glsl_align(matrix_columns * N, 16); + + const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? + glsl_type::float_type : glsl_type::double_type; + + for (unsigned i = 0; i < deref->type->vector_elements; i++) { + ir_rvalue *chan_offset = + add(base_offset, + new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); + if (!is_write || ((1U << i) & write_mask)) + insert_buffer_access(mem_ctx, deref, deref_type, chan_offset, + (1U << i), i); + } + } +} + +/** + * Determine if a thing being dereferenced is row-major + * + * There is some trickery here. + * + * If the thing being dereferenced is a member of uniform block \b without an + * instance name, then the name of the \c ir_variable is the field name of an + * interface type. If this field is row-major, then the thing referenced is + * row-major. + * + * If the thing being dereferenced is a member of uniform block \b with an + * instance name, then the last dereference in the tree will be an + * \c ir_dereference_record. If that record field is row-major, then the + * thing referenced is row-major. + */ +bool +lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) +{ + bool matrix = false; + const ir_rvalue *ir = deref; + + while (true) { + matrix = matrix || ir->type->without_array()->is_matrix(); + + switch (ir->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const array_deref = + (const ir_dereference_array *) ir; + + ir = array_deref->array; + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const record_deref = + (const ir_dereference_record *) ir; + + ir = record_deref->record; + + const int idx = ir->type->field_index(record_deref->field); + assert(idx >= 0); + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: + break; + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const var_deref = + (const ir_dereference_variable *) ir; + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(var_deref->var->data.matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: { + /* For interface block matrix variables we handle inherited + * layouts at HIR generation time, but we don't do that for shared + * variables, which are always column-major + */ + ir_variable *var = deref->variable_referenced(); + assert((var->is_in_buffer_block() && !matrix) || + var->data.mode == ir_var_shader_shared); + return false; + } + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + unreachable("invalid matrix layout"); + break; + } + + default: + return false; + } + } + + /* The tree must have ended with a dereference that wasn't an + * ir_dereference_variable. That is invalid, and it should be impossible. + */ + unreachable("invalid dereference tree"); + return false; +} + +/** + * This function initializes various values that will be used later by + * emit_access when actually emitting loads or stores. + * + * Note: const_offset is an input as well as an output, clients must + * initialize it to the offset of the variable in the underlying block, and + * this function will adjust it by adding the constant offset of the member + * being accessed into that variable. + */ +void +lower_buffer_access::setup_buffer_access(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing) +{ + *offset = new(mem_ctx) ir_constant(0u); + *row_major = is_dereferenced_thing_row_major(deref); + *matrix_columns = 1; + + /* Calculate the offset to the start of the region of the UBO + * dereferenced by *rvalue. This may be a variable offset if an + * array dereference has a variable index. + */ + while (deref) { + switch (deref->ir_type) { + case ir_type_dereference_variable: { + deref = NULL; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_array = (ir_dereference_array *) deref; + unsigned array_stride; + if (deref_array->array->type->is_vector()) { + /* We get this when storing or loading a component out of a vector + * with a non-constant index. This happens for v[i] = f where v is + * a vector (or m[i][j] = f where m is a matrix). If we don't + * lower that here, it gets turned into v = vector_insert(v, i, + * f), which loads the entire vector, modifies one component and + * then write the entire thing back. That breaks if another + * thread or SIMD channel is modifying the same vector. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + } else if (deref_array->array->type->is_matrix() && *row_major) { + /* When loading a vector out of a row major matrix, the + * step between the columns (vectors) is the size of a + * float, while the step between the rows (elements of a + * vector) is handled below in emit_ubo_loads. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + *matrix_columns = deref_array->array->type->matrix_columns; + } else if (deref_array->type->without_array()->is_interface()) { + /* We're processing an array dereference of an interface instance + * array. The thing being dereferenced *must* be a variable + * dereference because interfaces cannot be embedded in other + * types. In terms of calculating the offsets for the lowering + * pass, we don't care about the array index. All elements of an + * interface instance array will have the same offsets relative to + * the base of the block that backs them. + */ + deref = deref_array->array->as_dereference(); + break; + } else { + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_array); + + /* The array type will give the correct interface packing + * information + */ + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = deref_array->type->std430_array_stride(array_row_major); + } else { + array_stride = deref_array->type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + } + + ir_rvalue *array_index = deref_array->array_index; + if (array_index->type->base_type == GLSL_TYPE_INT) + array_index = i2u(array_index); + + ir_constant *const_index = + array_index->constant_expression_value(NULL); + if (const_index) { + *const_offset += array_stride * const_index->value.u[0]; + } else { + *offset = add(*offset, + mul(array_index, + new(mem_ctx) ir_constant(array_stride))); + } + deref = deref_array->array->as_dereference(); + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + const glsl_type *struct_type = deref_record->record->type; + unsigned intra_struct_offset = 0; + + for (unsigned int i = 0; i < struct_type->length; i++) { + const glsl_type *type = struct_type->fields.structure[i].type; + + ir_dereference_record *field_deref = new(mem_ctx) + ir_dereference_record(deref_record->record, + struct_type->fields.structure[i].name); + const bool field_row_major = + is_dereferenced_thing_row_major(field_deref); + + ralloc_free(field_deref); + + unsigned field_align = 0; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + field_align = type->std430_base_alignment(field_row_major); + else + field_align = type->std140_base_alignment(field_row_major); + + intra_struct_offset = glsl_align(intra_struct_offset, field_align); + + if (strcmp(struct_type->fields.structure[i].name, + deref_record->field) == 0) + break; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + intra_struct_offset += type->std430_size(field_row_major); + else + intra_struct_offset += type->std140_size(field_row_major); + + /* If the field just examined was itself a structure, apply rule + * #9: + * + * "The structure may have padding at the end; the base offset + * of the member following the sub-structure is rounded up to + * the next multiple of the base alignment of the structure." + */ + if (type->without_array()->is_record()) { + intra_struct_offset = glsl_align(intra_struct_offset, + field_align); + + } + } + + *const_offset += intra_struct_offset; + deref = deref_record->record->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *deref_swizzle = (ir_swizzle *) deref; + + assert(deref_swizzle->mask.num_components == 1); + + *const_offset += deref_swizzle->mask.x * sizeof(int); + deref = deref_swizzle->val->as_dereference(); + break; + } + + default: + assert(!"not reached"); + deref = NULL; + break; + } + } +} + +} /* namespace lower_buffer_access */ diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h new file mode 100644 index 00000000000..cc4614e9792 --- /dev/null +++ b/src/glsl/lower_buffer_access.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.h + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#pragma once +#ifndef LOWER_BUFFER_ACCESS_H +#define LOWER_BUFFER_ACCESS_H + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +namespace lower_buffer_access { + +class lower_buffer_access : public ir_rvalue_enter_visitor { +public: + virtual void + insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel) = 0; + + void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref, + ir_variable *base_offset, unsigned int deref_offset, + bool row_major, int matrix_columns, + unsigned int packing, unsigned int write_mask); + + bool is_dereferenced_thing_row_major(const ir_rvalue *deref); + + void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref, + ir_rvalue **offset, unsigned *const_offset, + bool *row_major, int *matrix_columns, + unsigned packing); +}; + +} /* namespace lower_buffer_access */ + +#endif /* LOWER_BUFFER_ACCESS_H */ diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp index 114bb5811b4..f29eba4f75f 100644 --- a/src/glsl/lower_named_interface_blocks.cpp +++ b/src/glsl/lower_named_interface_blocks.cpp @@ -187,6 +187,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions) new_var->data.sample = iface_t->fields.structure[i].sample; new_var->data.patch = iface_t->fields.structure[i].patch; new_var->data.stream = var->data.stream; + new_var->data.how_declared = var->data.how_declared; new_var->init_interface_type(iface_t); hash_table_insert(interface_namespace, new_var, diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp index 037c27d88ab..8d1eb1725d5 100644 --- a/src/glsl/lower_packed_varyings.cpp +++ b/src/glsl/lower_packed_varyings.cpp @@ -622,6 +622,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref( packed_var->data.interpolation = unpacked_var->data.interpolation; packed_var->data.location = location; packed_var->data.precision = unpacked_var->data.precision; + packed_var->data.always_active_io = unpacked_var->data.always_active_io; unpacked_var->insert_before(packed_var); this->packed_varyings[slot] = packed_var; } else { diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp new file mode 100644 index 00000000000..533cd9202f4 --- /dev/null +++ b/src/glsl/lower_shared_reference.cpp @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_shared_reference.cpp + * + * IR lower pass to replace dereferences of compute shader shared variables + * with intrinsic function calls. + * + * This relieves drivers of the responsibility of allocating space for the + * shared variables in the shared memory region. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace { + +struct var_offset { + struct list_head node; + const ir_variable *var; + unsigned offset; +}; + +class lower_shared_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + + lower_shared_reference_visitor(struct gl_shader *shader) + : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) + { + list_inithead(&var_offsets); + } + + ~lower_shared_reference_visitor() + { + ralloc_free(list_ctx); + } + + enum { + shared_load_access, + shared_store_access, + shared_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); + + void handle_rvalue(ir_rvalue **rvalue); + ir_visitor_status visit_enter(ir_assignment *ir); + void handle_assignment(ir_assignment *ir); + + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + + unsigned get_shared_offset(const ir_variable *); + + ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, + unsigned write_mask); + + void *list_ctx; + struct gl_shader *shader; + struct list_head var_offsets; + unsigned shared_size; + bool progress; +}; + +unsigned +lower_shared_reference_visitor::get_shared_offset(const ir_variable *var) +{ + list_for_each_entry(var_offset, var_entry, &var_offsets, node) { + if (var_entry->var == var) + return var_entry->offset; + } + + struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset); + list_add(&new_entry->node, &var_offsets); + new_entry->var = var; + + unsigned var_align = var->type->std430_base_alignment(false); + new_entry->offset = glsl_align(shared_size, var_align); + + unsigned var_size = var->type->std430_size(false); + shared_size = new_entry->offset + var_size; + + return new_entry->offset; +} + +void +lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + if (!deref) + return; + + ir_variable *var = deref->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_load_access; + + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + /* Now that we've calculated the offset to the start of the + * dereference, walk over the type and emit loads into a temporary. + */ + const glsl_type *type = (*rvalue)->type; + ir_variable *load_var = new(mem_ctx) ir_variable(type, + "shared_load_temp", + ir_var_temporary); + base_ir->insert_before(load_var); + + ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_load_temp_offset", + ir_var_temporary); + base_ir->insert_before(load_offset); + base_ir->insert_before(assign(load_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(load_var); + + emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, + matrix_columns, packing, 0); + + *rvalue = deref; + + progress = true; +} + +void +lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) +{ + if (!ir || !ir->lhs) + return; + + ir_rvalue *rvalue = ir->lhs->as_rvalue(); + if (!rvalue) + return; + + ir_dereference *deref = ir->lhs->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->lhs->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_store_access; + + /* We have a write to a shared variable, so declare a temporary and rewrite + * the assignment so that the temporary is the LHS. + */ + void *mem_ctx = ralloc_parent(shader->ir); + + const glsl_type *type = rvalue->type; + ir_variable *store_var = new(mem_ctx) ir_variable(type, + "shared_store_temp", + ir_var_temporary); + base_ir->insert_before(store_var); + ir->lhs = new(mem_ctx) ir_dereference_variable(store_var); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + deref = new(mem_ctx) ir_dereference_variable(store_var); + + ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_store_temp_offset", + ir_var_temporary); + base_ir->insert_before(store_offset); + base_ir->insert_before(assign(store_offset, offset)); + + /* Now we have to write the value assigned to the temporary back to memory */ + emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, + matrix_columns, packing, ir->write_mask); + + progress = true; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_assignment *ir) +{ + handle_assignment(ir); + return rvalue_visit(ir); +} + +void +lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + if (buffer_access_type == shared_store_access) { + ir_call *store = shared_store(mem_ctx, deref, offset, mask); + base_ir->insert_after(store); + } else { + ir_call *load = shared_load(mem_ctx, type, offset); + base_ir->insert_before(load); + ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL); + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + value)); + } +} + +static bool +compute_shader_enabled(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; +} + +ir_call * +lower_shared_reference_visitor::shared_store(void *mem_ctx, + ir_rvalue *deref, + ir_rvalue *offset, + unsigned write_mask) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_variable *val_ref = new(mem_ctx) + ir_variable(deref->type, "value" , ir_var_function_in); + sig_params.push_tail(val_ref); + + ir_variable *writemask_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); + sig_params.push_tail(writemask_ref); + + ir_function_signature *sig = new(mem_ctx) + ir_function_signature(glsl_type::void_type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); + f->add_signature(sig); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(deref->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + return new(mem_ctx) ir_call(sig, NULL, &call_params); +} + +ir_call * +lower_shared_reference_visitor::shared_load(void *mem_ctx, + const struct glsl_type *type, + ir_rvalue *offset) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); + f->add_signature(sig); + + ir_variable *result = new(mem_ctx) + ir_variable(type, "shared_load_result", ir_var_temporary); + base_ir->insert_before(result); + ir_dereference_variable *deref_result = new(mem_ctx) + ir_dereference_variable(result); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + + return new(mem_ctx) ir_call(sig, deref_result, &call_params); +} + +/* Lowers the intrinsic call to a new internal intrinsic that swaps the access + * to the shared variable in the first parameter by an offset. This involves + * creating the new internal intrinsic (i.e. the new function signature). + */ +ir_call * +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) +{ + /* Shared atomics usually have 2 parameters, the shared variable and an + * integer argument. The exception is CompSwap, that has an additional + * integer parameter. + */ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer shared variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference + */ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + buffer_access_type = shared_atomic_access; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + + /* Create the new internal function signature that will take an offset + * instead of a shared variable + */ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) + ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_shared", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + return new(mem_ctx) ir_call(sig, return_deref, &call_params); +} + +ir_call * +lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) +{ + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return ir; + + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + return lower_shared_atomic_intrinsic(ir); + } + + return ir; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_call *ir) +{ + ir_call *new_ir = check_for_shared_atomic_intrinsic(ir); + if (new_ir != ir) { + progress = true; + base_ir->replace_with(new_ir); + return visit_continue_with_parent; + } + + return rvalue_visit(ir); +} + +} /* unnamed namespace */ + +void +lower_shared_reference(struct gl_shader *shader, unsigned *shared_size) +{ + if (shader->Stage != MESA_SHADER_COMPUTE) + return; + + lower_shared_reference_visitor v(shader); + + /* Loop over the instructions lowering references, because we take a deref + * of an shared variable array using a shared variable dereference as the + * index will produce a collection of instructions all of which have cloned + * shared variable dereferences for that array index. + */ + do { + v.progress = false; + visit_list_elements(&v, shader->ir); + } while (v.progress); + + *shared_size = v.shared_size; +} diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index b74aa3d0630..a172054bac8 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -33,106 +33,16 @@ * their own. */ -#include "ir.h" +#include "lower_buffer_access.h" #include "ir_builder.h" -#include "ir_rvalue_visitor.h" #include "main/macros.h" #include "glsl_parser_extras.h" using namespace ir_builder; -/** - * Determine if a thing being dereferenced is row-major - * - * There is some trickery here. - * - * If the thing being dereferenced is a member of uniform block \b without an - * instance name, then the name of the \c ir_variable is the field name of an - * interface type. If this field is row-major, then the thing referenced is - * row-major. - * - * If the thing being dereferenced is a member of uniform block \b with an - * instance name, then the last dereference in the tree will be an - * \c ir_dereference_record. If that record field is row-major, then the - * thing referenced is row-major. - */ -static bool -is_dereferenced_thing_row_major(const ir_rvalue *deref) -{ - bool matrix = false; - const ir_rvalue *ir = deref; - - while (true) { - matrix = matrix || ir->type->without_array()->is_matrix(); - - switch (ir->ir_type) { - case ir_type_dereference_array: { - const ir_dereference_array *const array_deref = - (const ir_dereference_array *) ir; - - ir = array_deref->array; - break; - } - - case ir_type_dereference_record: { - const ir_dereference_record *const record_deref = - (const ir_dereference_record *) ir; - - ir = record_deref->record; - - const int idx = ir->type->field_index(record_deref->field); - assert(idx >= 0); - - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); - - switch (matrix_layout) { - case GLSL_MATRIX_LAYOUT_INHERITED: - break; - case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: - return false; - case GLSL_MATRIX_LAYOUT_ROW_MAJOR: - return matrix || deref->type->without_array()->is_record(); - } - - break; - } - - case ir_type_dereference_variable: { - const ir_dereference_variable *const var_deref = - (const ir_dereference_variable *) ir; - - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(var_deref->var->data.matrix_layout); - - switch (matrix_layout) { - case GLSL_MATRIX_LAYOUT_INHERITED: - assert(!matrix); - return false; - case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: - return false; - case GLSL_MATRIX_LAYOUT_ROW_MAJOR: - return matrix || deref->type->without_array()->is_record(); - } - - unreachable("invalid matrix layout"); - break; - } - - default: - return false; - } - } - - /* The tree must have ended with a dereference that wasn't an - * ir_dereference_variable. That is invalid, and it should be impossible. - */ - unreachable("invalid dereference tree"); - return false; -} - namespace { -class lower_ubo_reference_visitor : public ir_rvalue_enter_visitor { +class lower_ubo_reference_visitor : + public lower_buffer_access::lower_buffer_access { public: lower_ubo_reference_visitor(struct gl_shader *shader) : shader(shader) @@ -142,30 +52,38 @@ public: void handle_rvalue(ir_rvalue **rvalue); ir_visitor_status visit_enter(ir_assignment *ir); - void setup_for_load_or_store(ir_variable *var, + void setup_for_load_or_store(void *mem_ctx, + ir_variable *var, ir_rvalue *deref, ir_rvalue **offset, unsigned *const_offset, bool *row_major, int *matrix_columns, unsigned packing); - ir_expression *ubo_load(const struct glsl_type *type, + ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, ir_rvalue *offset); - ir_call *ssbo_load(const struct glsl_type *type, + ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, ir_rvalue *offset); + bool check_for_buffer_array_copy(ir_assignment *ir); + bool check_for_buffer_struct_copy(ir_assignment *ir); void check_for_ssbo_store(ir_assignment *ir); - void write_to_memory(ir_dereference *deref, - ir_variable *var, - ir_variable *write_var, - unsigned write_mask); - ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset, + void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, + ir_variable *write_var, unsigned write_mask); + ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask); - void emit_access(bool is_write, ir_dereference *deref, - ir_variable *base_offset, unsigned int deref_offset, - bool row_major, int matrix_columns, - unsigned packing, unsigned write_mask); + enum { + ubo_load_access, + ssbo_load_access, + ssbo_store_access, + ssbo_unsized_array_length_access, + ssbo_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); ir_visitor_status visit_enter(class ir_expression *); ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); @@ -175,7 +93,7 @@ public: ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, ir_dereference *, ir_variable *); - ir_expression *emit_ssbo_get_buffer_size(); + ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); unsigned calculate_unsized_array_stride(ir_dereference *deref, unsigned packing); @@ -184,12 +102,10 @@ public: ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); ir_visitor_status visit_enter(ir_call *ir); - void *mem_ctx; struct gl_shader *shader; struct gl_uniform_buffer_variable *ubo_var; ir_rvalue *uniform_block; bool progress; - bool is_shader_storage; }; /** @@ -324,7 +240,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, } void -lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, +lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, + ir_variable *var, ir_rvalue *deref, ir_rvalue **offset, unsigned *const_offset, @@ -339,10 +256,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, deref, &nonconst_block_index); /* Locate the block by interface name */ - this->is_shader_storage = var->is_in_shader_storage_block(); unsigned num_blocks; struct gl_uniform_block **blocks; - if (this->is_shader_storage) { + if (this->buffer_access_type != ubo_load_access) { num_blocks = shader->NumShaderStorageBlocks; blocks = shader->ShaderStorageBlocks; } else { @@ -370,164 +286,10 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var, assert(this->uniform_block); - *offset = new(mem_ctx) ir_constant(0u); - *const_offset = 0; - *row_major = is_dereferenced_thing_row_major(deref); - *matrix_columns = 1; + *const_offset = ubo_var->Offset; - /* Calculate the offset to the start of the region of the UBO - * dereferenced by *rvalue. This may be a variable offset if an - * array dereference has a variable index. - */ - while (deref) { - switch (deref->ir_type) { - case ir_type_dereference_variable: { - *const_offset += ubo_var->Offset; - deref = NULL; - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *deref_array = (ir_dereference_array *) deref; - unsigned array_stride; - if (deref_array->array->type->is_vector()) { - /* We get this when storing or loading a component out of a vector - * with a non-constant index. This happens for v[i] = f where v is - * a vector (or m[i][j] = f where m is a matrix). If we don't - * lower that here, it gets turned into v = vector_insert(v, i, - * f), which loads the entire vector, modifies one component and - * then write the entire thing back. That breaks if another - * thread or SIMD channel is modifying the same vector. - */ - array_stride = 4; - if (deref_array->array->type->is_double()) - array_stride *= 2; - } else if (deref_array->array->type->is_matrix() && *row_major) { - /* When loading a vector out of a row major matrix, the - * step between the columns (vectors) is the size of a - * float, while the step between the rows (elements of a - * vector) is handled below in emit_ubo_loads. - */ - array_stride = 4; - if (deref_array->array->type->is_double()) - array_stride *= 2; - *matrix_columns = deref_array->array->type->matrix_columns; - } else if (deref_array->type->without_array()->is_interface()) { - /* We're processing an array dereference of an interface instance - * array. The thing being dereferenced *must* be a variable - * dereference because interfaces cannot be embedded in other - * types. In terms of calculating the offsets for the lowering - * pass, we don't care about the array index. All elements of an - * interface instance array will have the same offsets relative to - * the base of the block that backs them. - */ - deref = deref_array->array->as_dereference(); - break; - } else { - /* Whether or not the field is row-major (because it might be a - * bvec2 or something) does not affect the array itself. We need - * to know whether an array element in its entirety is row-major. - */ - const bool array_row_major = - is_dereferenced_thing_row_major(deref_array); - - /* The array type will give the correct interface packing - * information - */ - if (packing == GLSL_INTERFACE_PACKING_STD430) { - array_stride = deref_array->type->std430_array_stride(array_row_major); - } else { - array_stride = deref_array->type->std140_size(array_row_major); - array_stride = glsl_align(array_stride, 16); - } - } - - ir_rvalue *array_index = deref_array->array_index; - if (array_index->type->base_type == GLSL_TYPE_INT) - array_index = i2u(array_index); - - ir_constant *const_index = - array_index->constant_expression_value(NULL); - if (const_index) { - *const_offset += array_stride * const_index->value.u[0]; - } else { - *offset = add(*offset, - mul(array_index, - new(mem_ctx) ir_constant(array_stride))); - } - deref = deref_array->array->as_dereference(); - break; - } - - case ir_type_dereference_record: { - ir_dereference_record *deref_record = (ir_dereference_record *) deref; - const glsl_type *struct_type = deref_record->record->type; - unsigned intra_struct_offset = 0; - - for (unsigned int i = 0; i < struct_type->length; i++) { - const glsl_type *type = struct_type->fields.structure[i].type; - - ir_dereference_record *field_deref = new(mem_ctx) - ir_dereference_record(deref_record->record, - struct_type->fields.structure[i].name); - const bool field_row_major = - is_dereferenced_thing_row_major(field_deref); - - ralloc_free(field_deref); - - unsigned field_align = 0; - - if (packing == GLSL_INTERFACE_PACKING_STD430) - field_align = type->std430_base_alignment(field_row_major); - else - field_align = type->std140_base_alignment(field_row_major); - - intra_struct_offset = glsl_align(intra_struct_offset, field_align); - - if (strcmp(struct_type->fields.structure[i].name, - deref_record->field) == 0) - break; - - if (packing == GLSL_INTERFACE_PACKING_STD430) - intra_struct_offset += type->std430_size(field_row_major); - else - intra_struct_offset += type->std140_size(field_row_major); - - /* If the field just examined was itself a structure, apply rule - * #9: - * - * "The structure may have padding at the end; the base offset - * of the member following the sub-structure is rounded up to - * the next multiple of the base alignment of the structure." - */ - if (type->without_array()->is_record()) { - intra_struct_offset = glsl_align(intra_struct_offset, - field_align); - - } - } - - *const_offset += intra_struct_offset; - deref = deref_record->record->as_dereference(); - break; - } - - case ir_type_swizzle: { - ir_swizzle *deref_swizzle = (ir_swizzle *) deref; - - assert(deref_swizzle->mask.num_components == 1); - - *const_offset += deref_swizzle->mask.x * sizeof(int); - deref = deref_swizzle->val->as_dereference(); - break; - } - - default: - assert(!"not reached"); - deref = NULL; - break; - } - } + setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, + matrix_columns, packing); } void @@ -544,7 +306,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) if (!var || !var->is_in_buffer_block()) return; - mem_ctx = ralloc_parent(shader->ir); + void *mem_ctx = ralloc_parent(shader->ir); ir_rvalue *offset = NULL; unsigned const_offset; @@ -552,10 +314,14 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; + this->buffer_access_type = + var->is_in_shader_storage_block() ? + ssbo_load_access : ubo_load_access; + /* Compute the offset to the start if the dereference as well as other * information we need to configure the write */ - setup_for_load_or_store(var, deref, + setup_for_load_or_store(mem_ctx, var, deref, &offset, &const_offset, &row_major, &matrix_columns, packing); @@ -577,7 +343,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) base_ir->insert_before(assign(load_offset, offset)); deref = new(mem_ctx) ir_dereference_variable(load_var); - emit_access(false, deref, load_offset, const_offset, + emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, matrix_columns, packing, 0); *rvalue = deref; @@ -585,7 +351,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) } ir_expression * -lower_ubo_reference_visitor::ubo_load(const glsl_type *type, +lower_ubo_reference_visitor::ubo_load(void *mem_ctx, + const glsl_type *type, ir_rvalue *offset) { ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); @@ -604,7 +371,8 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state) } ir_call * -lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref, +lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, + ir_rvalue *deref, ir_rvalue *offset, unsigned write_mask) { @@ -644,7 +412,8 @@ lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref, } ir_call * -lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type, +lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, + const struct glsl_type *type, ir_rvalue *offset) { exec_list sig_params; @@ -679,208 +448,46 @@ lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type, return new(mem_ctx) ir_call(sig, deref_result, &call_params); } -static inline int -writemask_for_size(unsigned n) -{ - return ((1 << n) - 1); -} - -/** - * Takes a deref and recursively calls itself to break the deref down to the - * point that the reads or writes generated are contiguous scalars or vectors. - */ void -lower_ubo_reference_visitor::emit_access(bool is_write, - ir_dereference *deref, - ir_variable *base_offset, - unsigned int deref_offset, - bool row_major, - int matrix_columns, - unsigned packing, - unsigned write_mask) +lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) { - if (deref->type->is_record()) { - unsigned int field_offset = 0; - - for (unsigned i = 0; i < deref->type->length; i++) { - const struct glsl_struct_field *field = - &deref->type->fields.structure[i]; - ir_dereference *field_deref = - new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), - field->name); - - field_offset = - glsl_align(field_offset, - field->type->std140_base_alignment(row_major)); - - emit_access(is_write, field_deref, base_offset, - deref_offset + field_offset, - row_major, 1, packing, - writemask_for_size(field_deref->type->vector_elements)); - - field_offset += field->type->std140_size(row_major); - } - return; - } - - if (deref->type->is_array()) { - unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? - deref->type->fields.array->std430_array_stride(row_major) : - glsl_align(deref->type->fields.array->std140_size(row_major), 16); - - for (unsigned i = 0; i < deref->type->length; i++) { - ir_constant *element = new(mem_ctx) ir_constant(i); - ir_dereference *element_deref = - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), - element); - emit_access(is_write, element_deref, base_offset, - deref_offset + i * array_stride, - row_major, 1, packing, - writemask_for_size(element_deref->type->vector_elements)); - } - return; - } - - if (deref->type->is_matrix()) { - for (unsigned i = 0; i < deref->type->matrix_columns; i++) { - ir_constant *col = new(mem_ctx) ir_constant(i); - ir_dereference *col_deref = - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); - - if (row_major) { - /* For a row-major matrix, the next column starts at the next - * element. - */ - int size_mul = deref->type->is_double() ? 8 : 4; - emit_access(is_write, col_deref, base_offset, - deref_offset + i * size_mul, - row_major, deref->type->matrix_columns, packing, - writemask_for_size(col_deref->type->vector_elements)); - } else { - int size_mul; - - /* std430 doesn't round up vec2 size to a vec4 size */ - if (packing == GLSL_INTERFACE_PACKING_STD430 && - deref->type->vector_elements == 2 && - !deref->type->is_double()) { - size_mul = 8; - } else { - /* std140 always rounds the stride of arrays (and matrices) to a - * vec4, so matrices are always 16 between columns/rows. With - * doubles, they will be 32 apart when there are more than 2 rows. - * - * For both std140 and std430, if the member is a - * three-'component vector with components consuming N basic - * machine units, the base alignment is 4N. For vec4, base - * alignment is 4N. - */ - size_mul = (deref->type->is_double() && - deref->type->vector_elements > 2) ? 32 : 16; - } - - emit_access(is_write, col_deref, base_offset, - deref_offset + i * size_mul, - row_major, deref->type->matrix_columns, packing, - writemask_for_size(col_deref->type->vector_elements)); - } - } - return; + switch (this->buffer_access_type) { + case ubo_load_access: + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + ubo_load(mem_ctx, type, offset), + mask)); + break; + case ssbo_load_access: { + ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); + base_ir->insert_before(load_ssbo); + ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); + ir_assignment *assignment = + assign(deref->clone(mem_ctx, NULL), value, mask); + base_ir->insert_before(assignment); + break; } - - assert(deref->type->is_scalar() || deref->type->is_vector()); - - if (!row_major) { - ir_rvalue *offset = - add(base_offset, new(mem_ctx) ir_constant(deref_offset)); - if (is_write) - base_ir->insert_after(ssbo_store(deref, offset, write_mask)); - else { - if (!this->is_shader_storage) { - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(deref->type, offset))); - } else { - ir_call *load_ssbo = ssbo_load(deref->type, offset); - base_ir->insert_before(load_ssbo); - ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value)); - } - } - } else { - unsigned N = deref->type->is_double() ? 8 : 4; - - /* We're dereffing a column out of a row-major matrix, so we - * gather the vector from each stored row. - */ - assert(deref->type->base_type == GLSL_TYPE_FLOAT || - deref->type->base_type == GLSL_TYPE_DOUBLE); - /* Matrices, row_major or not, are stored as if they were - * arrays of vectors of the appropriate size in std140. - * Arrays have their strides rounded up to a vec4, so the - * matrix stride is always 16. However a double matrix may either be 16 - * or 32 depending on the number of columns. - */ - assert(matrix_columns <= 4); - unsigned matrix_stride = 0; - /* Matrix stride for std430 mat2xY matrices are not rounded up to - * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform - * Block Layout": - * - * "2. If the member is a two- or four-component vector with components - * consuming N basic machine units, the base alignment is 2N or 4N, - * respectively." [...] - * "4. If the member is an array of scalars or vectors, the base alignment - * and array stride are set to match the base alignment of a single array - * element, according to rules (1), (2), and (3), and rounded up to the - * base alignment of a vec4." [...] - * "7. If the member is a row-major matrix with C columns and R rows, the - * matrix is stored identically to an array of R row vectors with C - * components each, according to rule (4)." [...] - * "When using the std430 storage layout, shader storage blocks will be - * laid out in buffer storage identically to uniform and shader storage - * blocks using the std140 layout, except that the base alignment and - * stride of arrays of scalars and vectors in rule 4 and of structures in - * rule 9 are not rounded up a multiple of the base alignment of a vec4." - */ - if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) - matrix_stride = 2 * N; - else - matrix_stride = glsl_align(matrix_columns * N, 16); - - const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? - glsl_type::float_type : glsl_type::double_type; - - for (unsigned i = 0; i < deref->type->vector_elements; i++) { - ir_rvalue *chan_offset = - add(base_offset, - new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); - if (is_write) { - /* If the component is not in the writemask, then don't - * store any value. - */ - if (!((1 << i) & write_mask)) - continue; - - base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1)); - } else { - if (!this->is_shader_storage) { - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(deref_type, chan_offset), - (1U << i))); - } else { - ir_call *load_ssbo = ssbo_load(deref_type, chan_offset); - base_ir->insert_before(load_ssbo); - ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - value, - (1U << i))); - } - } + case ssbo_store_access: + if (channel >= 0) { + base_ir->insert_after(ssbo_store(mem_ctx, + swizzle(deref, channel, 1), + offset, 1)); + } else { + base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); } + break; + default: + unreachable("invalid buffer_access_type in insert_buffer_access"); } } void -lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, +lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, + ir_dereference *deref, ir_variable *var, ir_variable *write_var, unsigned write_mask) @@ -891,10 +498,12 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; + this->buffer_access_type = ssbo_store_access; + /* Compute the offset to the start if the dereference as well as other * information we need to configure the write */ - setup_for_load_or_store(var, deref, + setup_for_load_or_store(mem_ctx, var, deref, &offset, &const_offset, &row_major, &matrix_columns, packing); @@ -910,7 +519,7 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, base_ir->insert_before(assign(write_offset, offset)); deref = new(mem_ctx) ir_dereference_variable(write_var); - emit_access(true, deref, write_offset, const_offset, + emit_access(mem_ctx, true, deref, write_offset, const_offset, row_major, matrix_columns, packing, write_mask); } @@ -985,7 +594,7 @@ lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assig } ir_expression * -lower_ubo_reference_visitor::emit_ssbo_get_buffer_size() +lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) { ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, @@ -1059,7 +668,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu ir_dereference *deref, ir_variable *var) { - mem_ctx = ralloc_parent(*rvalue); + void *mem_ctx = ralloc_parent(*rvalue); ir_rvalue *base_offset = NULL; unsigned const_offset; @@ -1068,17 +677,19 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu unsigned packing = var->get_interface_type()->interface_packing; int unsized_array_stride = calculate_unsized_array_stride(deref, packing); + this->buffer_access_type = ssbo_unsized_array_length_access; + /* Compute the offset to the start if the dereference as well as other * information we need to calculate the length. */ - setup_for_load_or_store(var, deref, + setup_for_load_or_store(mem_ctx, var, deref, &base_offset, &const_offset, &row_major, &matrix_columns, packing); /* array.length() = * max((buffer_object_size - offset_of_array) / stride_of_array, 0) */ - ir_expression *buffer_size = emit_ssbo_get_buffer_size(); + ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); ir_expression *offset_of_array = new(mem_ctx) ir_expression(ir_binop_add, base_offset, @@ -1112,13 +723,13 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) return; ir_variable *var = ir->lhs->variable_referenced(); - if (!var || !var->is_in_buffer_block()) + if (!var || !var->is_in_shader_storage_block()) return; /* We have a write to a buffer variable, so declare a temporary and rewrite * the assignment so that the temporary is the LHS. */ - mem_ctx = ralloc_parent(shader->ir); + void *mem_ctx = ralloc_parent(shader->ir); const glsl_type *type = rvalue->type; ir_variable *write_var = new(mem_ctx) ir_variable(type, @@ -1128,14 +739,131 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); /* Now we have to write the value assigned to the temporary back to memory */ - write_to_memory(deref, var, write_var, ir->write_mask); + write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); progress = true; } +static bool +is_buffer_backed_variable(ir_variable *var) +{ + return var->is_in_buffer_block() || + var->data.mode == ir_var_shader_shared; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be arrays + * FIXME: arrays of arrays? + */ + if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the array copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->length == rhs_deref->type->length); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + ir_dereference *lhs_i = + new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + + ir_dereference *rhs_i = + new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + ir->insert_after(assign(lhs_i, rhs_i)); + } + + ir->remove(); + progress = true; + return true; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be records */ + if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the struct copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->record_compare(rhs_deref->type)); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + const char *field_name = lhs_deref->type->fields.structure[i].name; + ir_dereference *lhs_field = + new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), + field_name); + ir_dereference *rhs_field = + new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), + field_name); + ir->insert_after(assign(lhs_field, rhs_field)); + } + + ir->remove(); + progress = true; + return true; +} ir_visitor_status lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) { + /* Array and struct copies could involve large amounts of load/store + * operations. To improve register pressure we want to special-case + * these and split them into individual element copies. + * This way we avoid emitting all the loads for the RHS first and + * all the writes for the LHS second and register usage is more + * efficient. + */ + if (check_for_buffer_array_copy(ir)) + return visit_continue_with_parent; + + if (check_for_buffer_struct_copy(ir)) + return visit_continue_with_parent; + check_ssbo_unsized_array_length_assignment(ir); check_for_ssbo_store(ir); return rvalue_visit(ir); @@ -1173,7 +901,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) /* Compute the offset to the start if the dereference and the * block index */ - mem_ctx = ralloc_parent(shader->ir); + void *mem_ctx = ralloc_parent(shader->ir); ir_rvalue *offset = NULL; unsigned const_offset; @@ -1181,7 +909,9 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) int matrix_columns; unsigned packing = var->get_interface_type()->interface_packing; - setup_for_load_or_store(var, deref, + this->buffer_access_type = ssbo_atomic_access; + + setup_for_load_or_store(mem_ctx, var, deref, &offset, &const_offset, &row_major, &matrix_columns, packing); @@ -1225,7 +955,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) sig->is_intrinsic = true; char func_name[64]; - sprintf(func_name, "%s_internal", ir->callee_name()); + sprintf(func_name, "%s_ssbo", ir->callee_name()); ir_function *f = new(mem_ctx) ir_function(func_name); f->add_signature(sig); @@ -1249,15 +979,29 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) ir_call * lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) { + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return ir; + const char *callee = ir->callee_name(); - if (!strcmp("__intrinsic_ssbo_atomic_add", callee) || - !strcmp("__intrinsic_ssbo_atomic_min", callee) || - !strcmp("__intrinsic_ssbo_atomic_max", callee) || - !strcmp("__intrinsic_ssbo_atomic_and", callee) || - !strcmp("__intrinsic_ssbo_atomic_or", callee) || - !strcmp("__intrinsic_ssbo_atomic_xor", callee) || - !strcmp("__intrinsic_ssbo_atomic_exchange", callee) || - !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) { + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { return lower_ssbo_atomic_intrinsic(ir); } diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index 1ab3afecc7e..a1ba9345e32 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -378,6 +378,9 @@ public: case ir_var_shader_storage: return this->lower_uniforms; + case ir_var_shader_shared: + return false; + case ir_var_function_in: case ir_var_const_in: return this->lower_temps; diff --git a/src/glsl/nir/builtin_type_macros.h b/src/glsl/nir/builtin_type_macros.h index 8e16ae45489..7bd2e4e6558 100644 --- a/src/glsl/nir/builtin_type_macros.h +++ b/src/glsl/nir/builtin_type_macros.h @@ -28,8 +28,6 @@ * language version or extension might provide them. */ -#include "glsl_types.h" - DECL_TYPE(error, GL_INVALID_ENUM, GLSL_TYPE_ERROR, 0, 0) DECL_TYPE(void, GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0) diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index a26300d1d26..9a25f2fc905 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -691,15 +691,15 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_store_ssbo; } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) { op = nir_intrinsic_load_ssbo; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_add; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_and; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_or; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_xor; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) { assert(ir->return_deref); if (ir->return_deref->type == glsl_type::int_type) op = nir_intrinsic_ssbo_atomic_imin; @@ -707,7 +707,7 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_ssbo_atomic_umin; else unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) { assert(ir->return_deref); if (ir->return_deref->type == glsl_type::int_type) op = nir_intrinsic_ssbo_atomic_imax; @@ -715,9 +715,9 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_ssbo_atomic_umax; else unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_exchange; - } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) { + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) { op = nir_intrinsic_ssbo_atomic_comp_swap; } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { op = nir_intrinsic_shader_clock; @@ -731,6 +731,38 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_memory_barrier_image; } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) { op = nir_intrinsic_memory_barrier_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { + op = nir_intrinsic_load_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { + op = nir_intrinsic_store_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { + op = nir_intrinsic_shared_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { + op = nir_intrinsic_shared_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { + op = nir_intrinsic_shared_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { + op = nir_intrinsic_shared_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { + op = nir_intrinsic_shared_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { + op = nir_intrinsic_shared_atomic_comp_swap; } else { unreachable("not reached"); } @@ -857,24 +889,12 @@ nir_visitor::visit(ir_call *ir) ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); assert(write_mask); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_store_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - - instr->const_index[1] = write_mask->value.u[0]; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); + instr->const_index[0] = write_mask->value.u[0]; instr->num_components = val->type->vector_elements; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); nir_builder_instr_insert(&b, &instr->instr); break; } @@ -885,20 +905,8 @@ nir_visitor::visit(ir_call *ir) param = param->get_next(); ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - /* Check if we need the indirect version */ - ir_constant *const_offset = offset->as_constant(); - if (!const_offset) { - op = nir_intrinsic_load_ssbo_indirect; - ralloc_free(instr); - instr = nir_intrinsic_instr_create(shader, op); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = 0; - dest = &instr->dest; - } else { - instr->const_index[0] = const_offset->value.u[0]; - } - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); const glsl_type *type = ir->return_deref->var->type; instr->num_components = type->vector_elements; @@ -978,6 +986,84 @@ nir_visitor::visit(ir_call *ir) nir_builder_instr_insert(&b, &instr->instr); break; } + case nir_intrinsic_load_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->const_index[0] = 0; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_store_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->const_index[0] = 0; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + instr->const_index[1] = write_mask->value.u[0]; + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Offset */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { + assert(op == nir_intrinsic_shared_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } default: unreachable("not reached"); } @@ -1178,21 +1264,11 @@ nir_visitor::visit(ir_expression *ir) /* Some special cases */ switch (ir->operation) { case ir_binop_ubo_load: { - ir_constant *const_index = ir->operands[1]->as_constant(); - - nir_intrinsic_op op; - if (const_index) { - op = nir_intrinsic_load_ubo; - } else { - op = nir_intrinsic_load_ubo_indirect; - } - - nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); load->num_components = ir->type->vector_elements; - load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */ load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - if (!const_index) - load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); add_instr(&load->instr, ir->type->vector_elements); /* diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index 64b5c0cb106..bc8677ba6fc 100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -22,7 +22,7 @@ */ #include <stdio.h> -#include "main/core.h" /* for Elements, MAX2 */ +#include "main/macros.h" #include "glsl_parser_extras.h" #include "glsl_types.h" #include "util/hash_table.h" diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 79df6d3df94..94bb76034a2 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1382,13 +1382,13 @@ static inline bool foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) { if (reverse) { - foreach_list_typed_safe_reverse(nir_cf_node, node, node, + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &if_stmt->else_list) { if (!foreach_cf_node(node, cb, reverse, state)) return false; } - foreach_list_typed_safe_reverse(nir_cf_node, node, node, + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &if_stmt->then_list) { if (!foreach_cf_node(node, cb, reverse, state)) return false; @@ -1412,7 +1412,7 @@ static inline bool foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) { if (reverse) { - foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { if (!foreach_cf_node(node, cb, reverse, state)) return false; } @@ -1472,7 +1472,7 @@ nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, if (!cb(impl->end_block, state)) return false; - foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { if (!foreach_cf_node(node, cb, true, state)) return false; } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index b7374e17407..021c4280557 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1309,8 +1309,8 @@ nir_block_last_instr(nir_block *block) foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) #define nir_foreach_instr_safe(block, instr) \ foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) -#define nir_foreach_instr_safe_reverse(block, instr) \ - foreach_list_typed_safe_reverse(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_reverse_safe(block, instr) \ + foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) typedef struct nir_if { nir_cf_node cf_node; @@ -2018,7 +2018,7 @@ void nir_assign_var_locations(struct exec_list *var_list, void nir_lower_io(nir_shader *shader, nir_variable_mode mode, int (*type_size)(const struct glsl_type *)); -nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr); +nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); void nir_lower_vars_to_ssa(nir_shader *shader); diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py index b16ef503c92..32784f6398d 100644 --- a/src/glsl/nir/nir_constant_expressions.py +++ b/src/glsl/nir/nir_constant_expressions.py @@ -32,14 +32,6 @@ template = """\ #include "util/half_float.h" #include "nir_constant_expressions.h" -#if defined(__SUNPRO_CC) -#include <ieeefp.h> -static int isnormal(double x) -{ - return fpclass(x) == FP_NORMAL; -} -#endif - /** * Evaluate one component of packSnorm4x8. */ diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index de30db61eea..5086e297e8e 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -222,6 +222,33 @@ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) +/* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + * operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + #define SYSTEM_VALUE(name, components, num_indices) \ INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) @@ -247,56 +274,62 @@ SYSTEM_VALUE(num_work_groups, 3, 0) SYSTEM_VALUE(helper_invocation, 1, 0) /* - * The format of the indices depends on the type of the load. For uniforms, - * the first index is the base address and the second index is an offset that - * should be added to the base address. (This way you can determine in the - * back-end which variable is being accessed even in an array.) For inputs, - * the one and only index corresponds to the attribute slot. UBO loads - * have two indices the first of which is the descriptor set and the second - * is the base address to load from. + * Load operations pull data from some piece of GPU memory. All load + * operations operate in terms of offsets into some piece of theoretical + * memory. Loads from externally visible memory (UBO and SSBO) simply take a + * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) + * take a base+offset pair where the base (const_index[0]) gives the location + * of the start of the variable being loaded and and the offset source is a + * offset into that variable. * - * UBO loads have a (possibly constant) source which is the UBO buffer index. - * For each type of load, the _indirect variant has one additional source - * (the second in the case of UBO's) that is the is an indirect to be added to - * the constant address or base offset to compute the final offset. + * Some load operations such as UBO/SSBO load and per_vertex loads take an + * additional source to specify which UBO/SSBO/vertex to load from. * - * For vector backends, the address is in terms of one vec4, and so each array - * element is +4 scalar components from the previous array element. For scalar - * backends, the address is in terms of a single 4-byte float/int and arrays - * elements begin immediately after the previous array element. + * The exact address type depends on the lowering pass that generates the + * load/store intrinsics. Typically, this is vec4 units for things such as + * varying slots and float units for fragment shader inputs. UBO and SSBO + * offsets are always in bytes. */ -#define LOAD(name, extra_srcs, indices, flags) \ - INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \ - INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \ - true, 0, 0, indices, flags) +#define LOAD(name, srcs, indices, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) -LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -LOAD(push_constant, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base, size } */ +LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) /* - * Stores work the same way as loads, except now the first register input is - * the value or array to store and the optional second input is the indirect - * offset. SSBO stores are similar, but they accept an extra source for the - * block index and an extra index with the writemask to use. + * Stores work the same way as loads, except now the first source is the value + * to store and the second (and possibly third) source specify where to store + * the value. SSBO and shared memory stores also have a write mask as + * const_index[0]. */ -#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \ - INTRINSIC(store_##name, 1 + extra_srcs, \ - ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) \ - INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \ - ARR(0, 1, extra_srcs_size, extra_srcs_size), \ - false, 0, 0, 1 + extra_indices, flags) +#define STORE(name, srcs, indices, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) -STORE(output, 0, 0, 0, 0) -STORE(per_vertex_output, 1, 1, 0, 0) -STORE(ssbo, 1, 1, 1, 0) +/* src[] = { value, offset }. const_index[] = { base } */ +STORE(output, 2, 1, 0) +/* src[] = { value, vertex, offset }. const_index[] = { base } */ +STORE(per_vertex_output, 3, 1, 0) +/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ +STORE(ssbo, 3, 1, 0) +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +STORE(shared, 2, 1, 0) -LAST_INTRINSIC(store_ssbo_indirect) +LAST_INTRINSIC(store_shared) diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c index c58c7785b3f..e2a2bb689a8 100644 --- a/src/glsl/nir/nir_lower_clip.c +++ b/src/glsl/nir/nir_lower_clip.c @@ -74,6 +74,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) store->const_index[0] = out->data.driver_location; store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); store->src[0].is_ssa = true; + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_builder_instr_insert(b, &store->instr); } @@ -85,6 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -112,6 +114,7 @@ find_output_in_block(nir_block *block, void *void_state) intr->const_index[0] == state->drvloc) { assert(state->def == NULL); assert(intr->src[0].is_ssa); + assert(nir_src_as_const_value(intr->src[1])); state->def = intr->src[0].ssa; #if !defined(DEBUG) diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 5683e69d865..ec6d09d5b6d 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -86,18 +86,11 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var) stage == MESA_SHADER_TESS_CTRL; } -static unsigned -get_io_offset(nir_deref_var *deref, nir_instr *instr, +static nir_ssa_def * +get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - nir_ssa_def **out_indirect, - struct lower_io_state *state) + int (*type_size)(const struct glsl_type *)) { - nir_ssa_def *indirect = NULL; - unsigned base_offset = 0; - - nir_builder *b = &state->builder; - b->cursor = nir_before_instr(instr); - nir_deref *tail = &deref->deref; /* For per-vertex input arrays (i.e. geometry shader inputs), keep the @@ -115,64 +108,57 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr, *vertex_index = vtx; } + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); + while (tail->child != NULL) { const struct glsl_type *parent_type = tail->type; tail = tail->child; if (tail->deref_type == nir_deref_type_array) { nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = state->type_size(tail->type); + unsigned size = type_size(tail->type); - base_offset += size * deref_array->base_offset; + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); if (deref_array->deref_array_type == nir_deref_array_type_indirect) { nir_ssa_def *mul = nir_imul(b, nir_imm_int(b, size), nir_ssa_for_src(b, deref_array->indirect, 1)); - indirect = indirect ? nir_iadd(b, indirect, mul) : mul; + offset = nir_iadd(b, offset, mul); } } else if (tail->deref_type == nir_deref_type_struct) { nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + unsigned field_offset = 0; for (unsigned i = 0; i < deref_struct->index; i++) { - base_offset += - state->type_size(glsl_get_struct_field(parent_type, i)); + field_offset += type_size(glsl_get_struct_field(parent_type, i)); } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); } } - *out_indirect = indirect; - return base_offset; + return offset; } static nir_intrinsic_op load_op(struct lower_io_state *state, - nir_variable_mode mode, bool per_vertex, bool has_indirect) + nir_variable_mode mode, bool per_vertex) { nir_intrinsic_op op; switch (mode) { case nir_var_shader_in: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect : - nir_intrinsic_load_per_vertex_input; - } else { - op = has_indirect ? nir_intrinsic_load_input_indirect : - nir_intrinsic_load_input; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; break; case nir_var_shader_out: - if (per_vertex) { - op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect : - nir_intrinsic_load_per_vertex_output; - } else { - op = has_indirect ? nir_intrinsic_load_output_indirect : - nir_intrinsic_load_output; - } + op = per_vertex ? nir_intrinsic_load_per_vertex_output : + nir_intrinsic_load_output; break; case nir_var_uniform: - op = has_indirect ? nir_intrinsic_load_uniform_indirect : - nir_intrinsic_load_uniform; + op = nir_intrinsic_load_uniform; break; default: unreachable("Unknown variable mode"); @@ -185,6 +171,8 @@ nir_lower_io_block(nir_block *block, void *void_state) { struct lower_io_state *state = void_state; + nir_builder *b = &state->builder; + nir_foreach_instr_safe(block, instr) { if (instr->type != nir_instr_type_intrinsic) continue; @@ -205,38 +193,33 @@ nir_lower_io_block(nir_block *block, void *void_state) mode != nir_var_uniform) continue; + b->cursor = nir_before_instr(instr); + switch (intrin->intrinsic) { case nir_intrinsic_load_var: { bool per_vertex = is_per_vertex_input(state, intrin->variables[0]->var) || is_per_vertex_output(state, intrin->variables[0]->var); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; - unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, - per_vertex ? &vertex_index : NULL, - &indirect, state); + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); nir_intrinsic_instr *load = nir_intrinsic_instr_create(state->mem_ctx, - load_op(state, mode, per_vertex, - indirect)); + load_op(state, mode, per_vertex)); load->num_components = intrin->num_components; - unsigned location = intrin->variables[0]->var->data.driver_location; - if (mode == nir_var_uniform) { - load->const_index[0] = location; - load->const_index[1] = offset; - } else { - load->const_index[0] = location + offset; - } + load->const_index[0] = + intrin->variables[0]->var->data.driver_location; if (per_vertex) load->src[0] = nir_src_for_ssa(vertex_index); - if (indirect) - load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect); + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); if (intrin->dest.is_ssa) { nir_ssa_dest_init(&load->instr, &load->dest, @@ -255,38 +238,33 @@ nir_lower_io_block(nir_block *block, void *void_state) case nir_intrinsic_store_var: { assert(mode == nir_var_shader_out); - nir_ssa_def *indirect; + nir_ssa_def *offset; nir_ssa_def *vertex_index; bool per_vertex = is_per_vertex_output(state, intrin->variables[0]->var); - unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr, - per_vertex ? &vertex_index : NULL, - &indirect, state); - offset += intrin->variables[0]->var->data.driver_location; + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); - nir_intrinsic_op store_op; - if (per_vertex) { - store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect - : nir_intrinsic_store_per_vertex_output; - } else { - store_op = indirect ? nir_intrinsic_store_output_indirect - : nir_intrinsic_store_output; - } + nir_intrinsic_op store_op = + per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, store_op); store->num_components = intrin->num_components; - store->const_index[0] = offset; nir_src_copy(&store->src[0], &intrin->src[0], store); + store->const_index[0] = + intrin->variables[0]->var->data.driver_location; + if (per_vertex) store->src[1] = nir_src_for_ssa(vertex_index); - if (indirect) - store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect); + store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); nir_instr_insert_before(&intrin->instr, &store->instr); nir_instr_remove(&intrin->instr); @@ -330,21 +308,24 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode, } /** - * Return the indirect source for a load/store indirect intrinsic. + * Return the offset soruce for a load/store intrinsic. */ nir_src * -nir_get_io_indirect_src(nir_intrinsic_instr *instr) +nir_get_io_offset_src(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { - case nir_intrinsic_load_input_indirect: - case nir_intrinsic_load_output_indirect: - case nir_intrinsic_load_uniform_indirect: + case nir_intrinsic_load_input: + case nir_intrinsic_load_output: + case nir_intrinsic_load_uniform: return &instr->src[0]; - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: - case nir_intrinsic_store_output_indirect: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: return &instr->src[1]; - case nir_intrinsic_store_per_vertex_output_indirect: + case nir_intrinsic_store_ssbo: + case nir_intrinsic_store_per_vertex_output: return &instr->src[2]; default: return NULL; @@ -360,11 +341,8 @@ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_load_per_vertex_input_indirect: - case nir_intrinsic_load_per_vertex_output_indirect: return &instr->src[0]; case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: return &instr->src[1]; default: return NULL; diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c index aa124d9e6cc..2f5927f6406 100644 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ b/src/glsl/nir/nir_lower_phis_to_scalar.c @@ -91,13 +91,9 @@ is_phi_src_scalarizable(nir_phi_src *src, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ubo_indirect: case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_ssbo_indirect: case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: return true; default: break; diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c index 19deafab37a..858088237e3 100644 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@ -25,7 +25,6 @@ #include "nir.h" #include "nir_builder.h" -#include "../program.h" #include "program/hash_table.h" #include "ir_uniform.h" diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c index 6995b9d6bc1..7df12e070f1 100644 --- a/src/glsl/nir/nir_lower_two_sided_color.c +++ b/src/glsl/nir/nir_lower_two_sided_color.c @@ -73,6 +73,7 @@ load_input(nir_builder *b, nir_variable *in) load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); load->num_components = 4; load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); nir_builder_instr_insert(b, &load->instr); @@ -151,6 +152,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state) unsigned drvloc = state->colors[idx].front->data.driver_location; if (intr->const_index[0] == drvloc) { + assert(nir_src_as_const_value(intr->src[0])); break; } } diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index 30ede52b146..3843f21c0ee 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -185,8 +185,10 @@ optimizations = [ (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), # Boolean simplifications - (('ine', 'a@bool', 0), 'a'), - (('ieq', 'a@bool', 0), ('inot', 'a')), + (('ieq', 'a@bool', True), a), + (('ine', 'a@bool', True), ('inot', a)), + (('ine', 'a@bool', False), a), + (('ieq', 'a@bool', False), ('inot', 'a')), (('bcsel', a, True, False), ('ine', a, 0)), (('bcsel', a, False, True), ('ieq', a, 0)), (('bcsel', True, b, c), b), diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 76bfc47c2a0..10f46cef1de 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -439,21 +439,15 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) switch (instr->intrinsic) { case nir_intrinsic_load_uniform: - case nir_intrinsic_load_uniform_indirect: var_list = &state->shader->uniforms; break; case nir_intrinsic_load_input: - case nir_intrinsic_load_input_indirect: case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_per_vertex_input_indirect: var_list = &state->shader->inputs; break; case nir_intrinsic_load_output: - case nir_intrinsic_load_output_indirect: case nir_intrinsic_store_output: - case nir_intrinsic_store_output_indirect: case nir_intrinsic_store_per_vertex_output: - case nir_intrinsic_store_per_vertex_output_indirect: var_list = &state->shader->outputs; break; default: diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index d014f3cd811..68edea09309 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -1112,8 +1112,7 @@ nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, static struct vtn_ssa_value * _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, unsigned set, unsigned binding, nir_variable_mode mode, - nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect, - struct vtn_type *type) + nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type) { struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); val->type = type->type; @@ -1121,26 +1120,20 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, if (glsl_type_is_vector_or_scalar(type->type)) { nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); load->num_components = glsl_get_vector_elements(type->type); - load->const_index[0] = offset; switch (op) { - case nir_intrinsic_load_ubo_indirect: - case nir_intrinsic_load_ssbo_indirect: - load->src[1] = nir_src_for_ssa(indirect); - /* fall through */ case nir_intrinsic_load_ubo: case nir_intrinsic_load_ssbo: { nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, set, binding, mode, index); load->src[0] = nir_src_for_ssa(res_index); + load->src[1] = nir_src_for_ssa(offset); break; } case nir_intrinsic_load_push_constant: - break; /* Nothing to do */ - case nir_intrinsic_load_push_constant_indirect: - load->src[0] = nir_src_for_ssa(indirect); + load->src[0] = nir_src_for_ssa(offset); break; default: @@ -1155,15 +1148,17 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); if (glsl_type_is_struct(type->type)) { for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - offset + type->offsets[i], - indirect, type->members[i]); + child_offset, type->members[i]); } } else { for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, - offset + i * type->stride, - indirect, type->array_element); + child_offset,type->array_element); } } } @@ -1174,8 +1169,7 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, static void vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, struct vtn_type **type, nir_deref *src_tail, - nir_ssa_def **index, - unsigned *offset, nir_ssa_def **indirect) + nir_ssa_def **index, nir_ssa_def **offset) { nir_deref *deref = &src->deref; @@ -1191,27 +1185,30 @@ vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, *index = nir_imm_int(&b->nb, 0); } - *offset = 0; - *indirect = NULL; + *offset = nir_imm_int(&b->nb, 0); while (deref != src_tail) { deref = deref->child; switch (deref->deref_type) { case nir_deref_type_array: { nir_deref_array *deref_array = nir_deref_as_array(deref); - if (deref_array->deref_array_type == nir_deref_array_type_direct) { - *offset += (*type)->stride * deref_array->base_offset; - } else { - nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa, - nir_imm_int(&b->nb, (*type)->stride)); - *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off; - } + nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + off = nir_iadd(&b->nb, off, deref_array->indirect.ssa); + + off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride)); + *offset = nir_iadd(&b->nb, *offset, off); + *type = (*type)->array_element; break; } case nir_deref_type_struct: { nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - *offset += (*type)->offsets[deref_struct->index]; + + unsigned elem_off = (*type)->offsets[deref_struct->index]; + *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off)); + *type = (*type)->members[deref_struct->index]; break; } @@ -1227,9 +1224,8 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, struct vtn_type *type, nir_deref *src_tail) { nir_ssa_def *index; - unsigned offset; - nir_ssa_def *indirect; - vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect); + nir_ssa_def *offset; + vtn_block_get_offset(b, src, &type, src_tail, &index, &offset); nir_intrinsic_op op; if (src->var->data.mode == nir_var_uniform) { @@ -1237,25 +1233,22 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src, /* UBO load */ assert(src->var->data.binding >= 0); - op = indirect ? nir_intrinsic_load_ubo_indirect - : nir_intrinsic_load_ubo; + op = nir_intrinsic_load_ubo; } else { /* Push constant load */ assert(src->var->data.descriptor_set == -1 && src->var->data.binding == -1); - op = indirect ? nir_intrinsic_load_push_constant_indirect - : nir_intrinsic_load_push_constant; + op = nir_intrinsic_load_push_constant; } } else { assert(src->var->data.mode == nir_var_shader_storage); - op = indirect ? nir_intrinsic_load_ssbo_indirect - : nir_intrinsic_load_ssbo; + op = nir_intrinsic_load_ssbo; } return _vtn_block_load(b, op, src->var->data.descriptor_set, src->var->data.binding, src->var->data.mode, - index, offset, indirect, type); + index, offset, type); } /* @@ -1319,14 +1312,13 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, static void _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, struct vtn_ssa_value *src, unsigned set, unsigned binding, - nir_variable_mode mode, nir_ssa_def *index, unsigned offset, - nir_ssa_def *indirect, struct vtn_type *type) + nir_variable_mode mode, nir_ssa_def *index, + nir_ssa_def *offset, struct vtn_type *type) { assert(src->type == type->type); if (glsl_type_is_vector_or_scalar(type->type)) { nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); store->num_components = glsl_get_vector_elements(type->type); - store->const_index[0] = offset; store->const_index[1] = (1 << store->num_components) - 1; store->src[0] = nir_src_for_ssa(src->def); @@ -1334,24 +1326,24 @@ _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, set, binding, mode, index); store->src[1] = nir_src_for_ssa(res_index); - - if (op == nir_intrinsic_store_ssbo_indirect) - store->src[2] = nir_src_for_ssa(indirect); + store->src[2] = nir_src_for_ssa(offset); nir_builder_instr_insert(&b->nb, &store->instr); } else { unsigned elems = glsl_get_length(type->type); if (glsl_type_is_struct(type->type)) { for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i])); _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, offset + type->offsets[i], indirect, - type->members[i]); + index, child_offset, type->members[i]); } } else { for (unsigned i = 0; i < elems; i++) { + nir_ssa_def *child_offset = + nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride)); _vtn_block_store(b, op, src->elems[i], set, binding, mode, - index, offset + i * type->stride, indirect, - type->array_element); + index, child_offset, type->array_element); } } } @@ -1363,16 +1355,14 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, nir_deref *dest_tail) { nir_ssa_def *index; - unsigned offset; - nir_ssa_def *indirect; - vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect); + nir_ssa_def *offset; + vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset); - nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect - : nir_intrinsic_store_ssbo; + nir_intrinsic_op op = nir_intrinsic_store_ssbo; return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, dest->var->data.binding, dest->var->data.mode, - index, offset, indirect, type); + index, offset, type); } static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, @@ -1545,7 +1535,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, /* We have exactly one push constant block */ assert(b->shader->num_uniforms == 0); - b->shader->num_uniforms = vtn_type_block_size(type); + b->shader->num_uniforms = vtn_type_block_size(type) * 4; break; case SpvStorageClassInput: var->data.mode = nir_var_shader_in; diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp index 9e73f3c44bb..89ce76bed2b 100644 --- a/src/glsl/opt_array_splitting.cpp +++ b/src/glsl/opt_array_splitting.cpp @@ -188,6 +188,10 @@ ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) if (entry && !ir->array_index->as_constant()) entry->split = false; + /* If the index is also array dereference, visit index. */ + if (ir->array_index->as_dereference_array()) + visit_enter(ir->array_index->as_dereference_array()); + return visit_continue_with_parent; } diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp index 184aaa1c297..fb24a4fad04 100644 --- a/src/glsl/opt_constant_propagation.cpp +++ b/src/glsl/opt_constant_propagation.cpp @@ -500,7 +500,8 @@ ir_constant_propagation_visitor::add_constant(ir_assignment *ir) * the variable value isn't modified between this assignment and the next * instruction where its value is read. */ - if (deref->var->data.mode == ir_var_shader_storage) + if (deref->var->data.mode == ir_var_shader_storage || + deref->var->data.mode == ir_var_shader_shared) return; entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant); diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp index cdfbc340243..56f6a819e1e 100644 --- a/src/glsl/opt_constant_variable.cpp +++ b/src/glsl/opt_constant_variable.cpp @@ -120,7 +120,8 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir) * and we can't be sure that this variable won't be written by another * thread. */ - if (var->data.mode == ir_var_shader_storage) + if (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared) return visit_continue; constval = ir->rhs->constant_expression_value(); diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp index f20699563fd..5d4cb4fe613 100644 --- a/src/glsl/opt_copy_propagation.cpp +++ b/src/glsl/opt_copy_propagation.cpp @@ -330,7 +330,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir) */ ir->condition = new(ralloc_parent(ir)) ir_constant(false); this->progress = true; - } else if (lhs_var->data.mode != ir_var_shader_storage) { + } else if (lhs_var->data.mode != ir_var_shader_storage && + lhs_var->data.mode != ir_var_shader_shared) { entry = new(this->acp) acp_entry(lhs_var, rhs_var); this->acp->push_tail(entry); } diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp index 68b70eedf92..53871130e12 100644 --- a/src/glsl/opt_dead_builtin_varyings.cpp +++ b/src/glsl/opt_dead_builtin_varyings.cpp @@ -85,7 +85,7 @@ public: { ir_variable *var = ir->variable_referenced(); - if (!var || var->data.mode != this->mode) + if (!var || var->data.mode != this->mode || !var->type->is_array()) return visit_continue; if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp index c5be166e75a..c2ce0b94ece 100644 --- a/src/glsl/opt_dead_code.cpp +++ b/src/glsl/opt_dead_code.cpp @@ -75,6 +75,20 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned) || !entry->declaration) continue; + /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5 + * (Core Profile) spec says: + * + * "With separable program objects, interfaces between shader + * stages may involve the outputs from one program object and the + * inputs from a second program object. For such interfaces, it is + * not possible to detect mismatches at link time, because the + * programs are linked separately. When each such program is + * linked, all inputs or outputs interfacing with another program + * stage are treated as active." + */ + if (entry->var->data.always_active_io) + continue; + if (!entry->assign_list.is_empty()) { /* Remove all the dead assignments to the variable we found. * Don't do so if it's a shader or function output, though. diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp index 7d59c787aed..84266b0cb58 100644 --- a/src/glsl/standalone_scaffolding.cpp +++ b/src/glsl/standalone_scaffolding.cpp @@ -69,7 +69,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, void _mesa_shader_debug(struct gl_context *, GLenum, GLuint *, - const char *, int) + const char *) { } diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h index a9ca5e4e3d3..f853a187bf4 100644 --- a/src/glsl/standalone_scaffolding.h +++ b/src/glsl/standalone_scaffolding.h @@ -52,7 +52,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *); extern "C" void _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id, - const char *msg, int len); + const char *msg); static inline gl_shader_stage _mesa_shader_enum_to_shader_stage(GLenum v) |