summaryrefslogtreecommitdiffstats
path: root/src/glsl
diff options
context:
space:
mode:
Diffstat (limited to 'src/glsl')
-rw-r--r--src/glsl/Makefile.sources3
-rw-r--r--src/glsl/ast.h10
-rw-r--r--src/glsl/ast_function.cpp24
-rw-r--r--src/glsl/ast_to_hir.cpp14
-rw-r--r--src/glsl/ast_type.cpp6
-rw-r--r--src/glsl/builtin_functions.cpp275
-rw-r--r--src/glsl/glsl_parser.yy20
-rw-r--r--src/glsl/glsl_parser_extras.cpp7
-rw-r--r--src/glsl/glsl_parser_extras.h7
-rw-r--r--src/glsl/hir_field_selection.cpp3
-rw-r--r--src/glsl/ir.cpp1
-rw-r--r--src/glsl/ir.h7
-rw-r--r--src/glsl/ir_constant_expression.cpp8
-rw-r--r--src/glsl/ir_optimization.h1
-rw-r--r--src/glsl/ir_reader.cpp4
-rw-r--r--src/glsl/link_varyings.cpp44
-rw-r--r--src/glsl/linker.cpp206
-rw-r--r--src/glsl/list.h2
-rw-r--r--src/glsl/lower_buffer_access.cpp490
-rw-r--r--src/glsl/lower_buffer_access.h65
-rw-r--r--src/glsl/lower_named_interface_blocks.cpp1
-rw-r--r--src/glsl/lower_packed_varyings.cpp1
-rw-r--r--src/glsl/lower_shared_reference.cpp496
-rw-r--r--src/glsl/lower_ubo_reference.cpp720
-rw-r--r--src/glsl/lower_variable_index_to_cond_assign.cpp3
-rw-r--r--src/glsl/nir/builtin_type_macros.h2
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp174
-rw-r--r--src/glsl/nir/glsl_types.cpp2
-rw-r--r--src/glsl/nir/nir.c8
-rw-r--r--src/glsl/nir/nir.h6
-rw-r--r--src/glsl/nir/nir_constant_expressions.py8
-rw-r--r--src/glsl/nir/nir_intrinsics.h117
-rw-r--r--src/glsl/nir/nir_lower_clip.c3
-rw-r--r--src/glsl/nir/nir_lower_io.c132
-rw-r--r--src/glsl/nir/nir_lower_phis_to_scalar.c4
-rw-r--r--src/glsl/nir/nir_lower_samplers.c1
-rw-r--r--src/glsl/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/glsl/nir/nir_opt_algebraic.py6
-rw-r--r--src/glsl/nir/nir_print.c6
-rw-r--r--src/glsl/nir/spirv_to_nir.c96
-rw-r--r--src/glsl/opt_array_splitting.cpp4
-rw-r--r--src/glsl/opt_constant_propagation.cpp3
-rw-r--r--src/glsl/opt_constant_variable.cpp3
-rw-r--r--src/glsl/opt_copy_propagation.cpp3
-rw-r--r--src/glsl/opt_dead_builtin_varyings.cpp2
-rw-r--r--src/glsl/opt_dead_code.cpp14
-rw-r--r--src/glsl/standalone_scaffolding.cpp2
-rw-r--r--src/glsl/standalone_scaffolding.h2
48 files changed, 2037 insertions, 981 deletions
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 0c9fd75d206..e64c31e17c6 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -160,6 +160,8 @@ LIBGLSL_FILES = \
loop_analysis.h \
loop_controls.cpp \
loop_unroll.cpp \
+ lower_buffer_access.cpp \
+ lower_buffer_access.h \
lower_clip_distance.cpp \
lower_const_arrays_to_uniforms.cpp \
lower_discard.cpp \
@@ -184,6 +186,7 @@ LIBGLSL_FILES = \
lower_vector_insert.cpp \
lower_vertex_id.cpp \
lower_output_reads.cpp \
+ lower_shared_reference.cpp \
lower_ubo_reference.cpp \
opt_algebraic.cpp \
opt_array_splitting.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 3bea63ea0ed..adfc7938bff 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -699,16 +699,16 @@ struct ast_type_qualifier {
bool merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q);
+ const ast_type_qualifier &q);
bool merge_out_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q,
+ const ast_type_qualifier &q,
ast_node* &node);
bool merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q,
+ const ast_type_qualifier &q,
ast_node* &node);
ast_subroutine_list *subroutine_list;
@@ -1152,7 +1152,7 @@ class ast_cs_input_layout : public ast_node
{
public:
ast_cs_input_layout(const struct YYLTYPE &locp,
- ast_layout_expression **local_size)
+ ast_layout_expression *const *local_size)
{
for (int i = 0; i < 3; i++) {
this->local_size[i] = local_size[i];
@@ -1197,6 +1197,6 @@ check_builtin_array_max_size(const char *name, unsigned size,
extern void _mesa_ast_process_interface_block(YYLTYPE *locp,
_mesa_glsl_parse_state *state,
ast_interface_block *const block,
- const struct ast_type_qualifier q);
+ const struct ast_type_qualifier &q);
#endif /* AST_H */
diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
index 466ece67424..e32a588f091 100644
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -143,19 +143,21 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
}
static bool
-verify_first_atomic_ssbo_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
+verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
ir_variable *var)
{
- if (!var || !var->is_in_shader_storage_block()) {
+ if (!var ||
+ (!var->is_in_shader_storage_block() &&
+ var->data.mode != ir_var_shader_shared)) {
_mesa_glsl_error(loc, state, "First argument to atomic function "
- "must be a buffer variable");
+ "must be a buffer or shared variable");
return false;
}
return true;
}
static bool
-is_atomic_ssbo_function(const char *func_name)
+is_atomic_function(const char *func_name)
{
return !strcmp(func_name, "atomicAdd") ||
!strcmp(func_name, "atomicMin") ||
@@ -276,16 +278,16 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
/* The first parameter of atomic functions must be a buffer variable */
const char *func_name = sig->function_name();
- bool is_atomic_ssbo = is_atomic_ssbo_function(func_name);
- if (is_atomic_ssbo) {
+ bool is_atomic = is_atomic_function(func_name);
+ if (is_atomic) {
const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head;
const ast_expression *const actual_ast =
exec_node_data(ast_expression, actual_ast_parameters.head, link);
YYLTYPE loc = actual_ast->get_location();
- if (!verify_first_atomic_ssbo_parameter(&loc, state,
- actual->variable_referenced())) {
+ if (!verify_first_atomic_parameter(&loc, state,
+ actual->variable_referenced())) {
return false;
}
}
@@ -1737,7 +1739,7 @@ ast_function_expression::handle_method(exec_list *instructions,
result = new(ctx) ir_constant(op->type->array_size());
}
} else if (op->type->is_vector()) {
- if (state->ARB_shading_language_420pack_enable) {
+ if (state->has_420pack()) {
/* .length() returns int. */
result = new(ctx) ir_constant((int) op->type->vector_elements);
} else {
@@ -1746,7 +1748,7 @@ ast_function_expression::handle_method(exec_list *instructions,
goto fail;
}
} else if (op->type->is_matrix()) {
- if (state->ARB_shading_language_420pack_enable) {
+ if (state->has_420pack()) {
/* .length() returns int. */
result = new(ctx) ir_constant((int) op->type->matrix_columns);
} else {
@@ -2075,7 +2077,7 @@ ast_aggregate_initializer::hir(exec_list *instructions,
}
const glsl_type *const constructor_type = this->constructor_type;
- if (!state->ARB_shading_language_420pack_enable) {
+ if (!state->has_420pack()) {
_mesa_glsl_error(&loc, state, "C-style initialization requires the "
"GL_ARB_shading_language_420pack extension");
return ir_rvalue::error_value(ctx);
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 52881a4da7a..fc6bb3e31f1 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -1825,7 +1825,7 @@ ast_expression::do_hir(exec_list *instructions,
* tree. This particular use must be at location specified in the grammar
* as 'variable_identifier'.
*/
- ir_variable *var =
+ ir_variable *var =
state->symbols->get_variable(this->primary_expression.identifier);
if (var != NULL) {
@@ -2650,7 +2650,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
return;
}
- } else if (state->is_version(420, 310) && base_type->is_image()) {
+ } else if ((state->is_version(420, 310) ||
+ state->ARB_shading_language_420pack_enable) &&
+ base_type->is_image()) {
assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
if (max_index >= ctx->Const.MaxImageUnits) {
_mesa_glsl_error(loc, state, "Image binding %d exceeds the "
@@ -3737,7 +3739,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
* expressions. Const-qualified global variables must still be
* initialized with constant expressions.
*/
- if (!state->ARB_shading_language_420pack_enable
+ if (!state->has_420pack()
|| state->current_function == NULL) {
_mesa_glsl_error(& initializer_loc, state,
"initializer of %s variable `%s' must be a "
@@ -5366,7 +5368,7 @@ ast_jump_statement::hir(exec_list *instructions,
if (state->current_function->return_type != ret_type) {
YYLTYPE loc = this->get_location();
- if (state->ARB_shading_language_420pack_enable) {
+ if (state->has_420pack()) {
if (!apply_implicit_conversion(state->current_function->return_type,
ret, state)) {
_mesa_glsl_error(& loc, state,
@@ -5558,8 +5560,8 @@ ast_switch_statement::hir(exec_list *instructions,
/* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec:
*
- * "The type of init-expression in a switch statement must be a
- * scalar integer."
+ * "The type of init-expression in a switch statement must be a
+ * scalar integer."
*/
if (!test_expression->type->is_scalar() ||
!test_expression->type->is_integer()) {
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 03ed4dcfa2a..8643b7bfb76 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -116,7 +116,7 @@ ast_type_qualifier::interpolation_string() const
bool
ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q)
+ const ast_type_qualifier &q)
{
ast_type_qualifier ubo_mat_mask;
ubo_mat_mask.flags.i = 0;
@@ -293,7 +293,7 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
bool
ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q,
+ const ast_type_qualifier &q,
ast_node* &node)
{
void *mem_ctx = state;
@@ -309,7 +309,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
bool
ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
- ast_type_qualifier q,
+ const ast_type_qualifier &q,
ast_node* &node)
{
void *mem_ctx = state;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 881ee2b6b55..9973a763087 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -479,6 +479,12 @@ compute_shader(const _mesa_glsl_parse_state *state)
}
static bool
+buffer_atomics_supported(const _mesa_glsl_parse_state *state)
+{
+ return compute_shader(state) || shader_storage_buffer_object(state);
+}
+
+static bool
barrier_supported(const _mesa_glsl_parse_state *state)
{
return compute_shader(state) ||
@@ -606,8 +612,8 @@ private:
ir_expression_operation opcode,
const glsl_type *return_type,
const glsl_type *param_type);
- ir_function_signature *binop(ir_expression_operation opcode,
- builtin_available_predicate avail,
+ ir_function_signature *binop(builtin_available_predicate avail,
+ ir_expression_operation opcode,
const glsl_type *return_type,
const glsl_type *param0_type,
const glsl_type *param1_type);
@@ -774,16 +780,16 @@ private:
ir_function_signature *_atomic_counter_op(const char *intrinsic,
builtin_available_predicate avail);
- ir_function_signature *_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
- const glsl_type *type);
- ir_function_signature *_atomic_ssbo_op2(const char *intrinsic,
- builtin_available_predicate avail,
- const glsl_type *type);
- ir_function_signature *_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
- const glsl_type *type);
- ir_function_signature *_atomic_ssbo_op3(const char *intrinsic,
- builtin_available_predicate avail,
- const glsl_type *type);
+ ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_op2(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_atomic_op3(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type);
B1(min3)
B1(max3)
@@ -930,53 +936,53 @@ builtin_builder::create_intrinsics()
_atomic_counter_intrinsic(shader_atomic_counters),
NULL);
- add_function("__intrinsic_ssbo_atomic_add",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_min",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_max",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_and",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_or",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_xor",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_exchange",
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic2(shader_storage_buffer_object,
- glsl_type::int_type),
- NULL);
- add_function("__intrinsic_ssbo_atomic_comp_swap",
- _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_intrinsic3(shader_storage_buffer_object,
- glsl_type::int_type),
+ add_function("__intrinsic_atomic_add",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_min",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_max",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_and",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_or",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_xor",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_exchange",
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic2(buffer_atomics_supported,
+ glsl_type::int_type),
+ NULL);
+ add_function("__intrinsic_atomic_comp_swap",
+ _atomic_intrinsic3(buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_intrinsic3(buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_image_functions(false);
@@ -1336,7 +1342,7 @@ builtin_builder::create_builtins()
_smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type),
_smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type),
NULL);
-
+
FD130(isnan)
FD130(isinf)
@@ -1373,7 +1379,7 @@ builtin_builder::create_builtins()
FD(distance)
FD(dot)
- add_function("cross", _cross(always_available, glsl_type::vec3_type),
+ add_function("cross", _cross(always_available, glsl_type::vec3_type),
_cross(fp64, glsl_type::dvec3_type), NULL);
FD(normalize)
@@ -2682,68 +2688,68 @@ builtin_builder::create_builtins()
NULL);
add_function("atomicAdd",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_add",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_add",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_add",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicMin",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_min",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_min",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_min",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicMax",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_max",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_max",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_max",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicAnd",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_and",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_and",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_and",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicOr",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_or",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_or",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_or",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicXor",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_xor",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_xor",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_xor",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicExchange",
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op2("__intrinsic_ssbo_atomic_exchange",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op2("__intrinsic_atomic_exchange",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op2("__intrinsic_atomic_exchange",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("atomicCompSwap",
- _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
- shader_storage_buffer_object,
- glsl_type::uint_type),
- _atomic_ssbo_op3("__intrinsic_ssbo_atomic_comp_swap",
- shader_storage_buffer_object,
- glsl_type::int_type),
+ _atomic_op3("__intrinsic_atomic_comp_swap",
+ buffer_atomics_supported,
+ glsl_type::uint_type),
+ _atomic_op3("__intrinsic_atomic_comp_swap",
+ buffer_atomics_supported,
+ glsl_type::int_type),
NULL);
add_function("min3",
@@ -3114,8 +3120,8 @@ builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *typ
}
ir_function_signature *
-builtin_builder::binop(ir_expression_operation opcode,
- builtin_available_predicate avail,
+builtin_builder::binop(builtin_available_predicate avail,
+ ir_expression_operation opcode,
const glsl_type *return_type,
const glsl_type *param0_type,
const glsl_type *param1_type)
@@ -3411,7 +3417,7 @@ builtin_builder::_atanh(const glsl_type *type)
ir_function_signature *
builtin_builder::_pow(const glsl_type *type)
{
- return binop(ir_binop_pow, always_available, type, type, type);
+ return binop(always_available, ir_binop_pow, type, type, type);
}
UNOP(exp, ir_unop_exp, always_available)
@@ -3435,7 +3441,7 @@ UNOPA(fract, ir_unop_fract)
ir_function_signature *
builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(ir_binop_mod, always_available, x_type, x_type, y_type);
+ return binop(always_available, ir_binop_mod, x_type, x_type, y_type);
}
ir_function_signature *
@@ -3457,14 +3463,14 @@ ir_function_signature *
builtin_builder::_min(builtin_available_predicate avail,
const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(ir_binop_min, avail, x_type, x_type, y_type);
+ return binop(avail, ir_binop_min, x_type, x_type, y_type);
}
ir_function_signature *
builtin_builder::_max(builtin_available_predicate avail,
const glsl_type *x_type, const glsl_type *y_type)
{
- return binop(ir_binop_max, avail, x_type, x_type, y_type);
+ return binop(avail, ir_binop_max, x_type, x_type, y_type);
}
ir_function_signature *
@@ -3793,9 +3799,9 @@ ir_function_signature *
builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type)
{
if (type->vector_elements == 1)
- return binop(ir_binop_mul, avail, type, type, type);
+ return binop(avail, ir_binop_mul, type, type, type);
- return binop(ir_binop_dot, avail,
+ return binop(avail, ir_binop_dot,
type->get_base_type(), type, type);
}
@@ -4311,7 +4317,7 @@ ir_function_signature *
builtin_builder::_lessThan(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_less, avail,
+ return binop(avail, ir_binop_less,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4319,7 +4325,7 @@ ir_function_signature *
builtin_builder::_lessThanEqual(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_lequal, avail,
+ return binop(avail, ir_binop_lequal,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4327,7 +4333,7 @@ ir_function_signature *
builtin_builder::_greaterThan(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_greater, avail,
+ return binop(avail, ir_binop_greater,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4335,7 +4341,7 @@ ir_function_signature *
builtin_builder::_greaterThanEqual(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_gequal, avail,
+ return binop(avail, ir_binop_gequal,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4343,7 +4349,7 @@ ir_function_signature *
builtin_builder::_equal(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_equal, avail,
+ return binop(avail, ir_binop_equal,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4351,7 +4357,7 @@ ir_function_signature *
builtin_builder::_notEqual(builtin_available_predicate avail,
const glsl_type *type)
{
- return binop(ir_binop_nequal, avail,
+ return binop(avail, ir_binop_nequal,
glsl_type::bvec(type->vector_elements), type, type);
}
@@ -4939,7 +4945,8 @@ builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type)
ir_function_signature *
builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type)
{
- return binop(ir_binop_ldexp, x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, x_type, x_type, exp_type);
+ return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31,
+ ir_binop_ldexp, x_type, x_type, exp_type);
}
ir_function_signature *
@@ -5096,8 +5103,8 @@ builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail)
}
ir_function_signature *
-builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
- const glsl_type *type)
+builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail,
+ const glsl_type *type)
{
ir_variable *atomic = in_var(type, "atomic");
ir_variable *data = in_var(type, "data");
@@ -5106,8 +5113,8 @@ builtin_builder::_atomic_ssbo_intrinsic2(builtin_available_predicate avail,
}
ir_function_signature *
-builtin_builder::_atomic_ssbo_intrinsic3(builtin_available_predicate avail,
- const glsl_type *type)
+builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail,
+ const glsl_type *type)
{
ir_variable *atomic = in_var(type, "atomic");
ir_variable *data1 = in_var(type, "data1");
@@ -5131,9 +5138,9 @@ builtin_builder::_atomic_counter_op(const char *intrinsic,
}
ir_function_signature *
-builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
- builtin_available_predicate avail,
- const glsl_type *type)
+builtin_builder::_atomic_op2(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type)
{
ir_variable *atomic = in_var(type, "atomic_var");
ir_variable *data = in_var(type, "atomic_data");
@@ -5147,9 +5154,9 @@ builtin_builder::_atomic_ssbo_op2(const char *intrinsic,
}
ir_function_signature *
-builtin_builder::_atomic_ssbo_op3(const char *intrinsic,
- builtin_available_predicate avail,
- const glsl_type *type)
+builtin_builder::_atomic_op3(const char *intrinsic,
+ builtin_available_predicate avail,
+ const glsl_type *type)
{
ir_variable *atomic = in_var(type, "atomic_var");
ir_variable *data1 = in_var(type, "atomic_data1");
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 5a8f98019d1..7eb383ac60c 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -948,7 +948,7 @@ parameter_qualifier:
if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out))
_mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier");
- if (!state->has_420pack() && $2.flags.q.constant)
+ if (!state->has_420pack_or_es31() && $2.flags.q.constant)
_mesa_glsl_error(&@1, state, "in/out/inout must come after const "
"or precise");
@@ -960,7 +960,7 @@ parameter_qualifier:
if ($2.precision != ast_precision_none)
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
- if (!(state->has_420pack() || state->is_version(420, 310)) &&
+ if (!state->has_420pack_or_es31() &&
$2.flags.i != 0)
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
@@ -1482,7 +1482,7 @@ layout_qualifier_id:
$$.index = $3;
}
- if ((state->has_420pack() ||
+ if ((state->has_420pack_or_es31() ||
state->has_atomic_counters() ||
state->has_shader_storage_buffer_objects()) &&
match_layout_qualifier("binding", $1, state) == 0) {
@@ -1714,7 +1714,7 @@ type_qualifier:
if ($2.flags.q.invariant)
_mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
- if (!state->has_420pack() && $2.flags.q.precise)
+ if (!state->has_420pack_or_es31() && $2.flags.q.precise)
_mesa_glsl_error(&@1, state,
"\"invariant\" must come after \"precise\"");
@@ -1747,7 +1747,7 @@ type_qualifier:
if ($2.has_interpolation())
_mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
- if (!state->has_420pack() &&
+ if (!state->has_420pack_or_es31() &&
($2.flags.q.precise || $2.flags.q.invariant)) {
_mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
"after \"precise\" or \"invariant\"");
@@ -1767,7 +1767,7 @@ type_qualifier:
* precise qualifiers since these are useful in ARB_separate_shader_objects.
* There is no clear spec guidance on this either.
*/
- if (!state->has_420pack() && $2.has_layout())
+ if (!state->has_420pack_or_es31() && $2.has_layout())
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
$$ = $1;
@@ -1785,7 +1785,7 @@ type_qualifier:
"duplicate auxiliary storage qualifier (centroid or sample)");
}
- if (!state->has_420pack() &&
+ if (!state->has_420pack_or_es31() &&
($2.flags.q.precise || $2.flags.q.invariant ||
$2.has_interpolation() || $2.has_layout())) {
_mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come "
@@ -1803,7 +1803,7 @@ type_qualifier:
if ($2.has_storage())
_mesa_glsl_error(&@1, state, "duplicate storage qualifier");
- if (!state->has_420pack() &&
+ if (!state->has_420pack_or_es31() &&
($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() ||
$2.has_layout() || $2.has_auxiliary_storage())) {
_mesa_glsl_error(&@1, state, "storage qualifiers must come after "
@@ -1819,7 +1819,7 @@ type_qualifier:
if ($2.precision != ast_precision_none)
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
- if (!(state->has_420pack() || state->is_version(420, 310)) &&
+ if (!(state->has_420pack_or_es31()) &&
$2.flags.i != 0)
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
@@ -2575,7 +2575,7 @@ interface_block:
{
ast_interface_block *block = (ast_interface_block *) $2;
- if (!state->has_420pack() && block->layout.has_layout() &&
+ if (!state->has_420pack_or_es31() && block->layout.has_layout() &&
!block->layout.is_default_qualifier) {
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
YYERROR;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index b41b64af2c1..3988376ea9d 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -479,7 +479,7 @@ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
struct gl_context *ctx = state->ctx;
/* Report the error via GL_ARB_debug_output. */
- _mesa_shader_debug(ctx, type, &msg_id, msg, strlen(msg));
+ _mesa_shader_debug(ctx, type, &msg_id, msg);
ralloc_strcat(&state->info_log, "\n");
}
@@ -876,7 +876,7 @@ void
_mesa_ast_process_interface_block(YYLTYPE *locp,
_mesa_glsl_parse_state *state,
ast_interface_block *const block,
- const struct ast_type_qualifier q)
+ const struct ast_type_qualifier &q)
{
if (q.flags.q.buffer) {
if (!state->has_shader_storage_buffer_objects()) {
@@ -1088,7 +1088,7 @@ void
ast_compound_statement::print(void) const
{
printf("{\n");
-
+
foreach_list_typed(ast_node, ast, link, &this->statements) {
ast->print();
}
@@ -1414,7 +1414,6 @@ ast_selection_statement::print(void) const
printf("else ");
else_statement->print();
}
-
}
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 17ff0b5af79..a4bda772a0f 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -97,7 +97,7 @@ struct _mesa_glsl_parse_state {
* supports the feature.
*
* \param required_glsl_es_version is the GLSL ES version that is required
- * to support the feature, or 0 if no version of GLSL ES suports the
+ * to support the feature, or 0 if no version of GLSL ES supports the
* feature.
*/
bool is_version(unsigned required_glsl_version,
@@ -255,6 +255,11 @@ struct _mesa_glsl_parse_state {
return ARB_shading_language_420pack_enable || is_version(420, 0);
}
+ bool has_420pack_or_es31() const
+ {
+ return ARB_shading_language_420pack_enable || is_version(420, 310);
+ }
+
bool has_compute_shader() const
{
return ARB_compute_shader_enable || is_version(430, 310);
diff --git a/src/glsl/hir_field_selection.cpp b/src/glsl/hir_field_selection.cpp
index 337095b95b8..92bb4139194 100644
--- a/src/glsl/hir_field_selection.cpp
+++ b/src/glsl/hir_field_selection.cpp
@@ -57,8 +57,7 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
expr->primary_expression.identifier);
}
} else if (op->type->is_vector() ||
- (state->ARB_shading_language_420pack_enable &&
- op->type->is_scalar())) {
+ (state->has_420pack() && op->type->is_scalar())) {
ir_swizzle *swiz = ir_swizzle::create(op,
expr->primary_expression.identifier,
op->type->vector_elements);
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index ca520f547a1..f989e9b6dff 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1669,6 +1669,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->data.pixel_center_integer = false;
this->data.depth_layout = ir_depth_layout_none;
this->data.used = false;
+ this->data.always_active_io = false;
this->data.read_only = false;
this->data.centroid = false;
this->data.sample = false;
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index e1109eec1d3..bdc932ef538 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -659,6 +659,13 @@ public:
unsigned assigned:1;
/**
+ * When separate shader programs are enabled, only input/outputs between
+ * the stages of a multi-stage separate program can be safely removed
+ * from the shader interface. Other input/outputs must remains active.
+ */
+ unsigned always_active_io:1;
+
+ /**
* Enum indicating how the variable was declared. See
* ir_var_declaration_type.
*
diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 67ed3605a8c..ef705851613 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -41,14 +41,6 @@
#include "glsl_types.h"
#include "program/hash_table.h"
-#if defined(__SUNPRO_CC) && !defined(isnormal)
-#include <ieeefp.h>
-static int isnormal(double x)
-{
- return fpclass(x) == FP_NORMAL;
-}
-#endif
-
static float
dot_f(ir_constant *op0, ir_constant *op1)
{
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 2fee81c09c2..dabd80a8d0d 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -124,6 +124,7 @@ bool lower_const_arrays_to_uniforms(exec_list *instructions);
bool lower_clip_distance(gl_shader *shader);
void lower_output_reads(unsigned stage, exec_list *instructions);
bool lower_packing_builtins(exec_list *instructions, int op_mask);
+void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size);
void lower_ubo_reference(struct gl_shader *shader);
void lower_packed_varyings(void *mem_ctx,
unsigned locations_used, ir_variable_mode mode,
diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp
index 07720e28749..7c0af1b712f 100644
--- a/src/glsl/ir_reader.cpp
+++ b/src/glsl/ir_reader.cpp
@@ -93,7 +93,7 @@ ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos)
ir_read_error(NULL, "couldn't parse S-Expression.");
return;
}
-
+
if (scan_for_protos) {
scan_for_prototypes(instructions, expr);
if (state->error)
@@ -147,7 +147,7 @@ ir_reader::read_type(s_expression *expr)
return glsl_type::get_array_instance(base_type, s_size->value());
}
-
+
s_symbol *type_sym = SX_AS_SYMBOL(expr);
if (type_sym == NULL) {
ir_read_error(expr, "expected <type>");
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index c0b4b3e820c..71750d1b42b 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
gl_shader_stage consumer_stage);
~varying_matches();
void record(ir_variable *producer_var, ir_variable *consumer_var);
- unsigned assign_locations(uint64_t reserved_slots);
+ unsigned assign_locations(uint64_t reserved_slots, bool separate_shader);
void store_locations() const;
private:
@@ -896,8 +896,10 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
{
assert(producer_var != NULL || consumer_var != NULL);
- if ((producer_var && !producer_var->data.is_unmatched_generic_inout)
- || (consumer_var && !consumer_var->data.is_unmatched_generic_inout)) {
+ if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
+ producer_var->data.explicit_location)) ||
+ (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
+ consumer_var->data.explicit_location))) {
/* Either a location already exists for this variable (since it is part
* of fixed functionality), or it has already been recorded as part of a
* previous match.
@@ -986,11 +988,36 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
* passed to varying_matches::record().
*/
unsigned
-varying_matches::assign_locations(uint64_t reserved_slots)
+varying_matches::assign_locations(uint64_t reserved_slots, bool separate_shader)
{
- /* Sort varying matches into an order that makes them easy to pack. */
- qsort(this->matches, this->num_matches, sizeof(*this->matches),
- &varying_matches::match_comparator);
+ /* We disable varying sorting for separate shader programs for the
+ * following reasons:
+ *
+ * 1/ All programs must sort the code in the same order to guarantee the
+ * interface matching. However varying_matches::record() will change the
+ * interpolation qualifier of some stages.
+ *
+ * 2/ GLSL version 4.50 removes the matching constrain on the interpolation
+ * qualifier.
+ *
+ * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec:
+ *
+ * "The type and presence of interpolation qualifiers of variables with
+ * the same name declared in all linked shaders for the same cross-stage
+ * interface must match, otherwise the link command will fail.
+ *
+ * When comparing an output from one stage to an input of a subsequent
+ * stage, the input and output don't match if their interpolation
+ * qualifiers (or lack thereof) are not the same."
+ *
+ * "It is a link-time error if, within the same stage, the interpolation
+ * qualifiers of variables of the same name do not match."
+ */
+ if (!separate_shader) {
+ /* Sort varying matches into an order that makes them easy to pack. */
+ qsort(this->matches, this->num_matches, sizeof(*this->matches),
+ &varying_matches::match_comparator);
+ }
unsigned generic_location = 0;
unsigned generic_patch_location = MAX_VARYING*4;
@@ -1590,7 +1617,8 @@ assign_varying_locations(struct gl_context *ctx,
reserved_varying_slot(producer, ir_var_shader_out) |
reserved_varying_slot(consumer, ir_var_shader_in);
- const unsigned slots_used = matches.assign_locations(reserved_slots);
+ const unsigned slots_used = matches.assign_locations(reserved_slots,
+ prog->SeparateShader);
matches.store_locations();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 331d9a28007..a87bbb2b994 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -631,20 +631,12 @@ link_invalidate_variable_locations(exec_list *ir)
/* ir_variable::is_unmatched_generic_inout is used by the linker while
* connecting outputs from one stage to inputs of the next stage.
- *
- * There are two implicit assumptions here. First, we assume that any
- * built-in variable (i.e., non-generic in or out) will have
- * explicit_location set. Second, we assume that any generic in or out
- * will not have explicit_location set.
- *
- * This second assumption will only be valid until
- * GL_ARB_separate_shader_objects is supported. When that extension is
- * implemented, this function will need some modifications.
*/
- if (!var->data.explicit_location) {
- var->data.is_unmatched_generic_inout = 1;
- } else {
+ if (var->data.explicit_location &&
+ var->data.location < VARYING_SLOT_VAR0) {
var->data.is_unmatched_generic_inout = 0;
+ } else {
+ var->data.is_unmatched_generic_inout = 1;
}
}
}
@@ -2421,6 +2413,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
continue;
if (var->data.explicit_location) {
+ var->data.is_unmatched_generic_inout = 0;
if ((var->data.location >= (int)(max_index + generic_base))
|| (var->data.location < 0)) {
linker_error(prog,
@@ -2690,6 +2683,53 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
return true;
}
+/**
+ * Match explicit locations of outputs to inputs and deactivate the
+ * unmatch flag if found so we don't optimise them away.
+ */
+static void
+match_explicit_outputs_to_inputs(struct gl_shader_program *prog,
+ gl_shader *producer,
+ gl_shader *consumer)
+{
+ glsl_symbol_table parameters;
+ ir_variable *explicit_locations[MAX_VARYING] = { NULL };
+
+ /* Find all shader outputs in the "producer" stage.
+ */
+ foreach_in_list(ir_instruction, node, producer->ir) {
+ ir_variable *const var = node->as_variable();
+
+ if ((var == NULL) || (var->data.mode != ir_var_shader_out))
+ continue;
+
+ if (var->data.explicit_location &&
+ var->data.location >= VARYING_SLOT_VAR0) {
+ const unsigned idx = var->data.location - VARYING_SLOT_VAR0;
+ if (explicit_locations[idx] == NULL)
+ explicit_locations[idx] = var;
+ }
+ }
+
+ /* Match inputs to outputs */
+ foreach_in_list(ir_instruction, node, consumer->ir) {
+ ir_variable *const input = node->as_variable();
+
+ if ((input == NULL) || (input->data.mode != ir_var_shader_in))
+ continue;
+
+ ir_variable *output = NULL;
+ if (input->data.explicit_location
+ && input->data.location >= VARYING_SLOT_VAR0) {
+ output = explicit_locations[input->data.location - VARYING_SLOT_VAR0];
+
+ if (output != NULL){
+ input->data.is_unmatched_generic_inout = 0;
+ output->data.is_unmatched_generic_inout = 0;
+ }
+ }
+ }
+}
/**
* Demote shader inputs and outputs that are not used in other stages
@@ -3940,6 +3980,77 @@ split_ubos_and_ssbos(void *mem_ctx,
assert(*num_ubos + *num_ssbos == num_blocks);
}
+static void
+set_always_active_io(exec_list *ir, ir_variable_mode io_mode)
+{
+ assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+
+ foreach_in_list(ir_instruction, node, ir) {
+ ir_variable *const var = node->as_variable();
+
+ if (var == NULL || var->data.mode != io_mode)
+ continue;
+
+ /* Don't set always active on builtins that haven't been redeclared */
+ if (var->data.how_declared == ir_var_declared_implicitly)
+ continue;
+
+ var->data.always_active_io = true;
+ }
+}
+
+/**
+ * When separate shader programs are enabled, only input/outputs between
+ * the stages of a multi-stage separate program can be safely removed
+ * from the shader interface. Other inputs/outputs must remain active.
+ */
+static void
+disable_varying_optimizations_for_sso(struct gl_shader_program *prog)
+{
+ unsigned first, last;
+ assert(prog->SeparateShader);
+
+ first = MESA_SHADER_STAGES;
+ last = 0;
+
+ /* Determine first and last stage. Excluding the compute stage */
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (!prog->_LinkedShaders[i])
+ continue;
+ if (first == MESA_SHADER_STAGES)
+ first = i;
+ last = i;
+ }
+
+ if (first == MESA_SHADER_STAGES)
+ return;
+
+ for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
+ gl_shader *sh = prog->_LinkedShaders[stage];
+ if (!sh)
+ continue;
+
+ if (first == last) {
+ /* For a single shader program only allow inputs to the vertex shader
+ * and outputs from the fragment shader to be removed.
+ */
+ if (stage != MESA_SHADER_VERTEX)
+ set_always_active_io(sh->ir, ir_var_shader_in);
+ if (stage != MESA_SHADER_FRAGMENT)
+ set_always_active_io(sh->ir, ir_var_shader_out);
+ } else {
+ /* For multi-stage separate shader programs only allow inputs and
+ * outputs between the shader stages to be removed as well as inputs
+ * to the vertex shader and outputs from the fragment shader.
+ */
+ if (stage == first && stage != MESA_SHADER_VERTEX)
+ set_always_active_io(sh->ir, ir_var_shader_in);
+ else if (stage == last && stage != MESA_SHADER_FRAGMENT)
+ set_always_active_io(sh->ir, ir_var_shader_out);
+ }
+ }
+}
+
void
link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
{
@@ -4139,11 +4250,18 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (!prog->LinkStatus)
goto done;
- unsigned prev;
+ unsigned first, last, prev;
- for (prev = 0; prev <= MESA_SHADER_FRAGMENT; prev++) {
- if (prog->_LinkedShaders[prev] != NULL)
- break;
+ first = MESA_SHADER_STAGES;
+ last = 0;
+
+ /* Determine first and last stage. */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!prog->_LinkedShaders[i])
+ continue;
+ if (first == MESA_SHADER_STAGES)
+ first = i;
+ last = i;
}
check_explicit_uniform_locations(ctx, prog);
@@ -4157,6 +4275,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
/* Validate the inputs of each stage with the output of the preceding
* stage.
*/
+ prev = first;
for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) {
if (prog->_LinkedShaders[i] == NULL)
continue;
@@ -4199,6 +4318,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
+ if (prog->SeparateShader)
+ disable_varying_optimizations_for_sso(prog);
+
if (!interstage_cross_validate_uniform_blocks(prog))
goto done;
@@ -4250,6 +4372,16 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
}
}
+ prev = first;
+ for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) {
+ if (prog->_LinkedShaders[i] == NULL)
+ continue;
+
+ match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev],
+ prog->_LinkedShaders[i]);
+ prev = i;
+ }
+
if (!assign_attribute_or_color_locations(prog, &ctx->Const,
MESA_SHADER_VERTEX)) {
goto done;
@@ -4260,20 +4392,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
goto done;
}
- unsigned first, last;
-
- first = MESA_SHADER_STAGES;
- last = 0;
-
- /* Determine first and last stage. */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- if (!prog->_LinkedShaders[i])
- continue;
- if (first == MESA_SHADER_STAGES)
- first = i;
- last = i;
- }
-
if (num_tfeedback_decls != 0) {
/* From GL_EXT_transform_feedback:
* A program will fail to link if:
@@ -4333,13 +4451,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
do_dead_builtin_varyings(ctx, sh, NULL,
num_tfeedback_decls, tfeedback_decls);
- if (!prog->SeparateShader)
+ if (!prog->SeparateShader) {
demote_shader_inputs_and_outputs(sh, ir_var_shader_out);
-
- /* Eliminate code that is now dead due to unused outputs being demoted.
- */
- while (do_dead_code(sh->ir, false))
- ;
+ /* Eliminate code that is now dead due to unused outputs being
+ * demoted.
+ */
+ while (do_dead_code(sh->ir, false))
+ ;
+ }
}
else if (first == MESA_SHADER_FRAGMENT) {
/* If the program only contains a fragment shader...
@@ -4356,11 +4475,14 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
0 /* num_tfeedback_decls */,
NULL /* tfeedback_decls */))
goto done;
- } else
+ } else {
demote_shader_inputs_and_outputs(sh, ir_var_shader_in);
-
- while (do_dead_code(sh->ir, false))
- ;
+ /* Eliminate code that is now dead due to unused inputs being
+ * demoted.
+ */
+ while (do_dead_code(sh->ir, false))
+ ;
+ }
}
next = last;
@@ -4485,6 +4607,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks)
lower_ubo_reference(prog->_LinkedShaders[i]);
+ if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables)
+ lower_shared_reference(prog->_LinkedShaders[i],
+ &prog->Comp.SharedSize);
+
lower_vector_derefs(prog->_LinkedShaders[i]);
}
diff --git a/src/glsl/list.h b/src/glsl/list.h
index 15fcd4abd1c..a1c4d82b017 100644
--- a/src/glsl/list.h
+++ b/src/glsl/list.h
@@ -688,7 +688,7 @@ inline void exec_node::insert_before(exec_list *before)
__node = __next, __next = \
exec_node_data(__type, (__next)->__field.next, __field))
-#define foreach_list_typed_safe_reverse(__type, __node, __field, __list) \
+#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \
for (__type * __node = \
exec_node_data(__type, (__list)->tail_pred, __field), \
* __prev = \
diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp
new file mode 100644
index 00000000000..f8c8d140ea8
--- /dev/null
+++ b/src/glsl/lower_buffer_access.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_buffer_access.cpp
+ *
+ * Helper for IR lowering pass to replace dereferences of buffer object based
+ * shader variables with intrinsic function calls.
+ *
+ * This helper is used by lowering passes for UBOs, SSBOs and compute shader
+ * shared variables.
+ */
+
+#include "lower_buffer_access.h"
+#include "ir_builder.h"
+#include "main/macros.h"
+#include "util/list.h"
+#include "glsl_parser_extras.h"
+
+using namespace ir_builder;
+
+namespace lower_buffer_access {
+
+static inline int
+writemask_for_size(unsigned n)
+{
+ return ((1 << n) - 1);
+}
+
+/**
+ * Takes a deref and recursively calls itself to break the deref down to the
+ * point that the reads or writes generated are contiguous scalars or vectors.
+ */
+void
+lower_buffer_access::emit_access(void *mem_ctx,
+ bool is_write,
+ ir_dereference *deref,
+ ir_variable *base_offset,
+ unsigned int deref_offset,
+ bool row_major,
+ int matrix_columns,
+ unsigned int packing,
+ unsigned int write_mask)
+{
+ if (deref->type->is_record()) {
+ unsigned int field_offset = 0;
+
+ for (unsigned i = 0; i < deref->type->length; i++) {
+ const struct glsl_struct_field *field =
+ &deref->type->fields.structure[i];
+ ir_dereference *field_deref =
+ new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
+ field->name);
+
+ field_offset =
+ glsl_align(field_offset,
+ field->type->std140_base_alignment(row_major));
+
+ emit_access(mem_ctx, is_write, field_deref, base_offset,
+ deref_offset + field_offset,
+ row_major, 1, packing,
+ writemask_for_size(field_deref->type->vector_elements));
+
+ field_offset += field->type->std140_size(row_major);
+ }
+ return;
+ }
+
+ if (deref->type->is_array()) {
+ unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
+ deref->type->fields.array->std430_array_stride(row_major) :
+ glsl_align(deref->type->fields.array->std140_size(row_major), 16);
+
+ for (unsigned i = 0; i < deref->type->length; i++) {
+ ir_constant *element = new(mem_ctx) ir_constant(i);
+ ir_dereference *element_deref =
+ new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
+ element);
+ emit_access(mem_ctx, is_write, element_deref, base_offset,
+ deref_offset + i * array_stride,
+ row_major, 1, packing,
+ writemask_for_size(element_deref->type->vector_elements));
+ }
+ return;
+ }
+
+ if (deref->type->is_matrix()) {
+ for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
+ ir_constant *col = new(mem_ctx) ir_constant(i);
+ ir_dereference *col_deref =
+ new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
+
+ if (row_major) {
+ /* For a row-major matrix, the next column starts at the next
+ * element.
+ */
+ int size_mul = deref->type->is_double() ? 8 : 4;
+ emit_access(mem_ctx, is_write, col_deref, base_offset,
+ deref_offset + i * size_mul,
+ row_major, deref->type->matrix_columns, packing,
+ writemask_for_size(col_deref->type->vector_elements));
+ } else {
+ int size_mul;
+
+ /* std430 doesn't round up vec2 size to a vec4 size */
+ if (packing == GLSL_INTERFACE_PACKING_STD430 &&
+ deref->type->vector_elements == 2 &&
+ !deref->type->is_double()) {
+ size_mul = 8;
+ } else {
+ /* std140 always rounds the stride of arrays (and matrices) to a
+ * vec4, so matrices are always 16 between columns/rows. With
+ * doubles, they will be 32 apart when there are more than 2 rows.
+ *
+ * For both std140 and std430, if the member is a
+ * three-'component vector with components consuming N basic
+ * machine units, the base alignment is 4N. For vec4, base
+ * alignment is 4N.
+ */
+ size_mul = (deref->type->is_double() &&
+ deref->type->vector_elements > 2) ? 32 : 16;
+ }
+
+ emit_access(mem_ctx, is_write, col_deref, base_offset,
+ deref_offset + i * size_mul,
+ row_major, deref->type->matrix_columns, packing,
+ writemask_for_size(col_deref->type->vector_elements));
+ }
+ }
+ return;
+ }
+
+ assert(deref->type->is_scalar() || deref->type->is_vector());
+
+ if (!row_major) {
+ ir_rvalue *offset =
+ add(base_offset, new(mem_ctx) ir_constant(deref_offset));
+ unsigned mask =
+ is_write ? write_mask : (1 << deref->type->vector_elements) - 1;
+ insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1);
+ } else {
+ unsigned N = deref->type->is_double() ? 8 : 4;
+
+ /* We're dereffing a column out of a row-major matrix, so we
+ * gather the vector from each stored row.
+ */
+ assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
+ deref->type->base_type == GLSL_TYPE_DOUBLE);
+ /* Matrices, row_major or not, are stored as if they were
+ * arrays of vectors of the appropriate size in std140.
+ * Arrays have their strides rounded up to a vec4, so the
+ * matrix stride is always 16. However a double matrix may either be 16
+ * or 32 depending on the number of columns.
+ */
+ assert(matrix_columns <= 4);
+ unsigned matrix_stride = 0;
+ /* Matrix stride for std430 mat2xY matrices are not rounded up to
+ * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
+ * Block Layout":
+ *
+ * "2. If the member is a two- or four-component vector with components
+ * consuming N basic machine units, the base alignment is 2N or 4N,
+ * respectively." [...]
+ * "4. If the member is an array of scalars or vectors, the base alignment
+ * and array stride are set to match the base alignment of a single array
+ * element, according to rules (1), (2), and (3), and rounded up to the
+ * base alignment of a vec4." [...]
+ * "7. If the member is a row-major matrix with C columns and R rows, the
+ * matrix is stored identically to an array of R row vectors with C
+ * components each, according to rule (4)." [...]
+ * "When using the std430 storage layout, shader storage blocks will be
+ * laid out in buffer storage identically to uniform and shader storage
+ * blocks using the std140 layout, except that the base alignment and
+ * stride of arrays of scalars and vectors in rule 4 and of structures in
+ * rule 9 are not rounded up a multiple of the base alignment of a vec4."
+ */
+ if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
+ matrix_stride = 2 * N;
+ else
+ matrix_stride = glsl_align(matrix_columns * N, 16);
+
+ const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
+ glsl_type::float_type : glsl_type::double_type;
+
+ for (unsigned i = 0; i < deref->type->vector_elements; i++) {
+ ir_rvalue *chan_offset =
+ add(base_offset,
+ new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
+ if (!is_write || ((1U << i) & write_mask))
+ insert_buffer_access(mem_ctx, deref, deref_type, chan_offset,
+ (1U << i), i);
+ }
+ }
+}
+
+/**
+ * Determine if a thing being dereferenced is row-major
+ *
+ * There is some trickery here.
+ *
+ * If the thing being dereferenced is a member of uniform block \b without an
+ * instance name, then the name of the \c ir_variable is the field name of an
+ * interface type. If this field is row-major, then the thing referenced is
+ * row-major.
+ *
+ * If the thing being dereferenced is a member of uniform block \b with an
+ * instance name, then the last dereference in the tree will be an
+ * \c ir_dereference_record. If that record field is row-major, then the
+ * thing referenced is row-major.
+ */
+bool
+lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref)
+{
+ bool matrix = false;
+ const ir_rvalue *ir = deref;
+
+ while (true) {
+ matrix = matrix || ir->type->without_array()->is_matrix();
+
+ switch (ir->ir_type) {
+ case ir_type_dereference_array: {
+ const ir_dereference_array *const array_deref =
+ (const ir_dereference_array *) ir;
+
+ ir = array_deref->array;
+ break;
+ }
+
+ case ir_type_dereference_record: {
+ const ir_dereference_record *const record_deref =
+ (const ir_dereference_record *) ir;
+
+ ir = record_deref->record;
+
+ const int idx = ir->type->field_index(record_deref->field);
+ assert(idx >= 0);
+
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
+
+ switch (matrix_layout) {
+ case GLSL_MATRIX_LAYOUT_INHERITED:
+ break;
+ case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
+ return false;
+ case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
+ return matrix || deref->type->without_array()->is_record();
+ }
+
+ break;
+ }
+
+ case ir_type_dereference_variable: {
+ const ir_dereference_variable *const var_deref =
+ (const ir_dereference_variable *) ir;
+
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(var_deref->var->data.matrix_layout);
+
+ switch (matrix_layout) {
+ case GLSL_MATRIX_LAYOUT_INHERITED: {
+ /* For interface block matrix variables we handle inherited
+ * layouts at HIR generation time, but we don't do that for shared
+ * variables, which are always column-major
+ */
+ ir_variable *var = deref->variable_referenced();
+ assert((var->is_in_buffer_block() && !matrix) ||
+ var->data.mode == ir_var_shader_shared);
+ return false;
+ }
+ case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
+ return false;
+ case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
+ return matrix || deref->type->without_array()->is_record();
+ }
+
+ unreachable("invalid matrix layout");
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ /* The tree must have ended with a dereference that wasn't an
+ * ir_dereference_variable. That is invalid, and it should be impossible.
+ */
+ unreachable("invalid dereference tree");
+ return false;
+}
+
+/**
+ * This function initializes various values that will be used later by
+ * emit_access when actually emitting loads or stores.
+ *
+ * Note: const_offset is an input as well as an output, clients must
+ * initialize it to the offset of the variable in the underlying block, and
+ * this function will adjust it by adding the constant offset of the member
+ * being accessed into that variable.
+ */
+void
+lower_buffer_access::setup_buffer_access(void *mem_ctx,
+ ir_variable *var,
+ ir_rvalue *deref,
+ ir_rvalue **offset,
+ unsigned *const_offset,
+ bool *row_major,
+ int *matrix_columns,
+ unsigned packing)
+{
+ *offset = new(mem_ctx) ir_constant(0u);
+ *row_major = is_dereferenced_thing_row_major(deref);
+ *matrix_columns = 1;
+
+ /* Calculate the offset to the start of the region of the UBO
+ * dereferenced by *rvalue. This may be a variable offset if an
+ * array dereference has a variable index.
+ */
+ while (deref) {
+ switch (deref->ir_type) {
+ case ir_type_dereference_variable: {
+ deref = NULL;
+ break;
+ }
+
+ case ir_type_dereference_array: {
+ ir_dereference_array *deref_array = (ir_dereference_array *) deref;
+ unsigned array_stride;
+ if (deref_array->array->type->is_vector()) {
+ /* We get this when storing or loading a component out of a vector
+ * with a non-constant index. This happens for v[i] = f where v is
+ * a vector (or m[i][j] = f where m is a matrix). If we don't
+ * lower that here, it gets turned into v = vector_insert(v, i,
+ * f), which loads the entire vector, modifies one component and
+ * then write the entire thing back. That breaks if another
+ * thread or SIMD channel is modifying the same vector.
+ */
+ array_stride = 4;
+ if (deref_array->array->type->is_double())
+ array_stride *= 2;
+ } else if (deref_array->array->type->is_matrix() && *row_major) {
+ /* When loading a vector out of a row major matrix, the
+ * step between the columns (vectors) is the size of a
+ * float, while the step between the rows (elements of a
+ * vector) is handled below in emit_ubo_loads.
+ */
+ array_stride = 4;
+ if (deref_array->array->type->is_double())
+ array_stride *= 2;
+ *matrix_columns = deref_array->array->type->matrix_columns;
+ } else if (deref_array->type->without_array()->is_interface()) {
+ /* We're processing an array dereference of an interface instance
+ * array. The thing being dereferenced *must* be a variable
+ * dereference because interfaces cannot be embedded in other
+ * types. In terms of calculating the offsets for the lowering
+ * pass, we don't care about the array index. All elements of an
+ * interface instance array will have the same offsets relative to
+ * the base of the block that backs them.
+ */
+ deref = deref_array->array->as_dereference();
+ break;
+ } else {
+ /* Whether or not the field is row-major (because it might be a
+ * bvec2 or something) does not affect the array itself. We need
+ * to know whether an array element in its entirety is row-major.
+ */
+ const bool array_row_major =
+ is_dereferenced_thing_row_major(deref_array);
+
+ /* The array type will give the correct interface packing
+ * information
+ */
+ if (packing == GLSL_INTERFACE_PACKING_STD430) {
+ array_stride = deref_array->type->std430_array_stride(array_row_major);
+ } else {
+ array_stride = deref_array->type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ }
+ }
+
+ ir_rvalue *array_index = deref_array->array_index;
+ if (array_index->type->base_type == GLSL_TYPE_INT)
+ array_index = i2u(array_index);
+
+ ir_constant *const_index =
+ array_index->constant_expression_value(NULL);
+ if (const_index) {
+ *const_offset += array_stride * const_index->value.u[0];
+ } else {
+ *offset = add(*offset,
+ mul(array_index,
+ new(mem_ctx) ir_constant(array_stride)));
+ }
+ deref = deref_array->array->as_dereference();
+ break;
+ }
+
+ case ir_type_dereference_record: {
+ ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+ const glsl_type *struct_type = deref_record->record->type;
+ unsigned intra_struct_offset = 0;
+
+ for (unsigned int i = 0; i < struct_type->length; i++) {
+ const glsl_type *type = struct_type->fields.structure[i].type;
+
+ ir_dereference_record *field_deref = new(mem_ctx)
+ ir_dereference_record(deref_record->record,
+ struct_type->fields.structure[i].name);
+ const bool field_row_major =
+ is_dereferenced_thing_row_major(field_deref);
+
+ ralloc_free(field_deref);
+
+ unsigned field_align = 0;
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ field_align = type->std430_base_alignment(field_row_major);
+ else
+ field_align = type->std140_base_alignment(field_row_major);
+
+ intra_struct_offset = glsl_align(intra_struct_offset, field_align);
+
+ if (strcmp(struct_type->fields.structure[i].name,
+ deref_record->field) == 0)
+ break;
+
+ if (packing == GLSL_INTERFACE_PACKING_STD430)
+ intra_struct_offset += type->std430_size(field_row_major);
+ else
+ intra_struct_offset += type->std140_size(field_row_major);
+
+ /* If the field just examined was itself a structure, apply rule
+ * #9:
+ *
+ * "The structure may have padding at the end; the base offset
+ * of the member following the sub-structure is rounded up to
+ * the next multiple of the base alignment of the structure."
+ */
+ if (type->without_array()->is_record()) {
+ intra_struct_offset = glsl_align(intra_struct_offset,
+ field_align);
+
+ }
+ }
+
+ *const_offset += intra_struct_offset;
+ deref = deref_record->record->as_dereference();
+ break;
+ }
+
+ case ir_type_swizzle: {
+ ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
+
+ assert(deref_swizzle->mask.num_components == 1);
+
+ *const_offset += deref_swizzle->mask.x * sizeof(int);
+ deref = deref_swizzle->val->as_dereference();
+ break;
+ }
+
+ default:
+ assert(!"not reached");
+ deref = NULL;
+ break;
+ }
+ }
+}
+
+} /* namespace lower_buffer_access */
diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h
new file mode 100644
index 00000000000..cc4614e9792
--- /dev/null
+++ b/src/glsl/lower_buffer_access.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_buffer_access.h
+ *
+ * Helper for IR lowering pass to replace dereferences of buffer object based
+ * shader variables with intrinsic function calls.
+ *
+ * This helper is used by lowering passes for UBOs, SSBOs and compute shader
+ * shared variables.
+ */
+
+#pragma once
+#ifndef LOWER_BUFFER_ACCESS_H
+#define LOWER_BUFFER_ACCESS_H
+
+#include "ir.h"
+#include "ir_rvalue_visitor.h"
+
+namespace lower_buffer_access {
+
+class lower_buffer_access : public ir_rvalue_enter_visitor {
+public:
+ virtual void
+ insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+ const glsl_type *type, ir_rvalue *offset,
+ unsigned mask, int channel) = 0;
+
+ void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref,
+ ir_variable *base_offset, unsigned int deref_offset,
+ bool row_major, int matrix_columns,
+ unsigned int packing, unsigned int write_mask);
+
+ bool is_dereferenced_thing_row_major(const ir_rvalue *deref);
+
+ void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
+ ir_rvalue **offset, unsigned *const_offset,
+ bool *row_major, int *matrix_columns,
+ unsigned packing);
+};
+
+} /* namespace lower_buffer_access */
+
+#endif /* LOWER_BUFFER_ACCESS_H */
diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp
index 114bb5811b4..f29eba4f75f 100644
--- a/src/glsl/lower_named_interface_blocks.cpp
+++ b/src/glsl/lower_named_interface_blocks.cpp
@@ -187,6 +187,7 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
new_var->data.sample = iface_t->fields.structure[i].sample;
new_var->data.patch = iface_t->fields.structure[i].patch;
new_var->data.stream = var->data.stream;
+ new_var->data.how_declared = var->data.how_declared;
new_var->init_interface_type(iface_t);
hash_table_insert(interface_namespace, new_var,
diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp
index 037c27d88ab..8d1eb1725d5 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -622,6 +622,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
packed_var->data.interpolation = unpacked_var->data.interpolation;
packed_var->data.location = location;
packed_var->data.precision = unpacked_var->data.precision;
+ packed_var->data.always_active_io = unpacked_var->data.always_active_io;
unpacked_var->insert_before(packed_var);
this->packed_varyings[slot] = packed_var;
} else {
diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp
new file mode 100644
index 00000000000..533cd9202f4
--- /dev/null
+++ b/src/glsl/lower_shared_reference.cpp
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_shared_reference.cpp
+ *
+ * IR lower pass to replace dereferences of compute shader shared variables
+ * with intrinsic function calls.
+ *
+ * This relieves drivers of the responsibility of allocating space for the
+ * shared variables in the shared memory region.
+ */
+
+#include "lower_buffer_access.h"
+#include "ir_builder.h"
+#include "main/macros.h"
+#include "util/list.h"
+#include "glsl_parser_extras.h"
+
+using namespace ir_builder;
+
+namespace {
+
+struct var_offset {
+ struct list_head node;
+ const ir_variable *var;
+ unsigned offset;
+};
+
+class lower_shared_reference_visitor :
+ public lower_buffer_access::lower_buffer_access {
+public:
+
+ lower_shared_reference_visitor(struct gl_shader *shader)
+ : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
+ {
+ list_inithead(&var_offsets);
+ }
+
+ ~lower_shared_reference_visitor()
+ {
+ ralloc_free(list_ctx);
+ }
+
+ enum {
+ shared_load_access,
+ shared_store_access,
+ shared_atomic_access,
+ } buffer_access_type;
+
+ void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+ const glsl_type *type, ir_rvalue *offset,
+ unsigned mask, int channel);
+
+ void handle_rvalue(ir_rvalue **rvalue);
+ ir_visitor_status visit_enter(ir_assignment *ir);
+ void handle_assignment(ir_assignment *ir);
+
+ ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
+ ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
+ ir_visitor_status visit_enter(ir_call *ir);
+
+ unsigned get_shared_offset(const ir_variable *);
+
+ ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
+ ir_rvalue *offset);
+ ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
+ unsigned write_mask);
+
+ void *list_ctx;
+ struct gl_shader *shader;
+ struct list_head var_offsets;
+ unsigned shared_size;
+ bool progress;
+};
+
+unsigned
+lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
+{
+ list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
+ if (var_entry->var == var)
+ return var_entry->offset;
+ }
+
+ struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
+ list_add(&new_entry->node, &var_offsets);
+ new_entry->var = var;
+
+ unsigned var_align = var->type->std430_base_alignment(false);
+ new_entry->offset = glsl_align(shared_size, var_align);
+
+ unsigned var_size = var->type->std430_size(false);
+ shared_size = new_entry->offset + var_size;
+
+ return new_entry->offset;
+}
+
+void
+lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_dereference *deref = (*rvalue)->as_dereference();
+ if (!deref)
+ return;
+
+ ir_variable *var = deref->variable_referenced();
+ if (!var || var->data.mode != ir_var_shader_shared)
+ return;
+
+ buffer_access_type = shared_load_access;
+
+ void *mem_ctx = ralloc_parent(shader->ir);
+
+ ir_rvalue *offset = NULL;
+ unsigned const_offset = get_shared_offset(var);
+ bool row_major;
+ int matrix_columns;
+ assert(var->get_interface_type() == NULL);
+ const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+
+ setup_buffer_access(mem_ctx, var, deref,
+ &offset, &const_offset,
+ &row_major, &matrix_columns, packing);
+
+ /* Now that we've calculated the offset to the start of the
+ * dereference, walk over the type and emit loads into a temporary.
+ */
+ const glsl_type *type = (*rvalue)->type;
+ ir_variable *load_var = new(mem_ctx) ir_variable(type,
+ "shared_load_temp",
+ ir_var_temporary);
+ base_ir->insert_before(load_var);
+
+ ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+ "shared_load_temp_offset",
+ ir_var_temporary);
+ base_ir->insert_before(load_offset);
+ base_ir->insert_before(assign(load_offset, offset));
+
+ deref = new(mem_ctx) ir_dereference_variable(load_var);
+
+ emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
+ matrix_columns, packing, 0);
+
+ *rvalue = deref;
+
+ progress = true;
+}
+
+void
+lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
+{
+ if (!ir || !ir->lhs)
+ return;
+
+ ir_rvalue *rvalue = ir->lhs->as_rvalue();
+ if (!rvalue)
+ return;
+
+ ir_dereference *deref = ir->lhs->as_dereference();
+ if (!deref)
+ return;
+
+ ir_variable *var = ir->lhs->variable_referenced();
+ if (!var || var->data.mode != ir_var_shader_shared)
+ return;
+
+ buffer_access_type = shared_store_access;
+
+ /* We have a write to a shared variable, so declare a temporary and rewrite
+ * the assignment so that the temporary is the LHS.
+ */
+ void *mem_ctx = ralloc_parent(shader->ir);
+
+ const glsl_type *type = rvalue->type;
+ ir_variable *store_var = new(mem_ctx) ir_variable(type,
+ "shared_store_temp",
+ ir_var_temporary);
+ base_ir->insert_before(store_var);
+ ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
+
+ ir_rvalue *offset = NULL;
+ unsigned const_offset = get_shared_offset(var);
+ bool row_major;
+ int matrix_columns;
+ assert(var->get_interface_type() == NULL);
+ const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+
+ setup_buffer_access(mem_ctx, var, deref,
+ &offset, &const_offset,
+ &row_major, &matrix_columns, packing);
+
+ deref = new(mem_ctx) ir_dereference_variable(store_var);
+
+ ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+ "shared_store_temp_offset",
+ ir_var_temporary);
+ base_ir->insert_before(store_offset);
+ base_ir->insert_before(assign(store_offset, offset));
+
+ /* Now we have to write the value assigned to the temporary back to memory */
+ emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
+ matrix_columns, packing, ir->write_mask);
+
+ progress = true;
+}
+
+ir_visitor_status
+lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
+{
+ handle_assignment(ir);
+ return rvalue_visit(ir);
+}
+
+void
+lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
+ ir_dereference *deref,
+ const glsl_type *type,
+ ir_rvalue *offset,
+ unsigned mask,
+ int channel)
+{
+ if (buffer_access_type == shared_store_access) {
+ ir_call *store = shared_store(mem_ctx, deref, offset, mask);
+ base_ir->insert_after(store);
+ } else {
+ ir_call *load = shared_load(mem_ctx, type, offset);
+ base_ir->insert_before(load);
+ ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+ base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+ value));
+ }
+}
+
+static bool
+compute_shader_enabled(const _mesa_glsl_parse_state *state)
+{
+ return state->stage == MESA_SHADER_COMPUTE;
+}
+
+ir_call *
+lower_shared_reference_visitor::shared_store(void *mem_ctx,
+ ir_rvalue *deref,
+ ir_rvalue *offset,
+ unsigned write_mask)
+{
+ exec_list sig_params;
+
+ ir_variable *offset_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+ sig_params.push_tail(offset_ref);
+
+ ir_variable *val_ref = new(mem_ctx)
+ ir_variable(deref->type, "value" , ir_var_function_in);
+ sig_params.push_tail(val_ref);
+
+ ir_variable *writemask_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
+ sig_params.push_tail(writemask_ref);
+
+ ir_function_signature *sig = new(mem_ctx)
+ ir_function_signature(glsl_type::void_type, compute_shader_enabled);
+ assert(sig);
+ sig->replace_parameters(&sig_params);
+ sig->is_intrinsic = true;
+
+ ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
+ f->add_signature(sig);
+
+ exec_list call_params;
+ call_params.push_tail(offset->clone(mem_ctx, NULL));
+ call_params.push_tail(deref->clone(mem_ctx, NULL));
+ call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
+ return new(mem_ctx) ir_call(sig, NULL, &call_params);
+}
+
+ir_call *
+lower_shared_reference_visitor::shared_load(void *mem_ctx,
+ const struct glsl_type *type,
+ ir_rvalue *offset)
+{
+ exec_list sig_params;
+
+ ir_variable *offset_ref = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
+ sig_params.push_tail(offset_ref);
+
+ ir_function_signature *sig =
+ new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
+ assert(sig);
+ sig->replace_parameters(&sig_params);
+ sig->is_intrinsic = true;
+
+ ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
+ f->add_signature(sig);
+
+ ir_variable *result = new(mem_ctx)
+ ir_variable(type, "shared_load_result", ir_var_temporary);
+ base_ir->insert_before(result);
+ ir_dereference_variable *deref_result = new(mem_ctx)
+ ir_dereference_variable(result);
+
+ exec_list call_params;
+ call_params.push_tail(offset->clone(mem_ctx, NULL));
+
+ return new(mem_ctx) ir_call(sig, deref_result, &call_params);
+}
+
+/* Lowers the intrinsic call to a new internal intrinsic that swaps the access
+ * to the shared variable in the first parameter by an offset. This involves
+ * creating the new internal intrinsic (i.e. the new function signature).
+ */
+ir_call *
+lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
+{
+ /* Shared atomics usually have 2 parameters, the shared variable and an
+ * integer argument. The exception is CompSwap, that has an additional
+ * integer parameter.
+ */
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 2 || param_count == 3);
+
+ /* First argument must be a scalar integer shared variable */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ assert(inst->ir_type == ir_type_dereference_variable ||
+ inst->ir_type == ir_type_dereference_array ||
+ inst->ir_type == ir_type_dereference_record ||
+ inst->ir_type == ir_type_swizzle);
+
+ ir_rvalue *deref = (ir_rvalue *) inst;
+ assert(deref->type->is_scalar() && deref->type->is_integer());
+
+ ir_variable *var = deref->variable_referenced();
+ assert(var);
+
+ /* Compute the offset to the start if the dereference
+ */
+ void *mem_ctx = ralloc_parent(shader->ir);
+
+ ir_rvalue *offset = NULL;
+ unsigned const_offset = get_shared_offset(var);
+ bool row_major;
+ int matrix_columns;
+ assert(var->get_interface_type() == NULL);
+ const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
+ buffer_access_type = shared_atomic_access;
+
+ setup_buffer_access(mem_ctx, var, deref,
+ &offset, &const_offset,
+ &row_major, &matrix_columns, packing);
+
+ assert(offset);
+ assert(!row_major);
+ assert(matrix_columns == 1);
+
+ ir_rvalue *deref_offset =
+ add(offset, new(mem_ctx) ir_constant(const_offset));
+
+ /* Create the new internal function signature that will take an offset
+ * instead of a shared variable
+ */
+ exec_list sig_params;
+ ir_variable *sig_param = new(mem_ctx)
+ ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
+ sig_params.push_tail(sig_param);
+
+ const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
+ glsl_type::int_type : glsl_type::uint_type;
+ sig_param = new(mem_ctx)
+ ir_variable(type, "data1", ir_var_function_in);
+ sig_params.push_tail(sig_param);
+
+ if (param_count == 3) {
+ sig_param = new(mem_ctx)
+ ir_variable(type, "data2", ir_var_function_in);
+ sig_params.push_tail(sig_param);
+ }
+
+ ir_function_signature *sig =
+ new(mem_ctx) ir_function_signature(deref->type,
+ compute_shader_enabled);
+ assert(sig);
+ sig->replace_parameters(&sig_params);
+ sig->is_intrinsic = true;
+
+ char func_name[64];
+ sprintf(func_name, "%s_shared", ir->callee_name());
+ ir_function *f = new(mem_ctx) ir_function(func_name);
+ f->add_signature(sig);
+
+ /* Now, create the call to the internal intrinsic */
+ exec_list call_params;
+ call_params.push_tail(deref_offset);
+ param = ir->actual_parameters.get_head()->get_next();
+ ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+ call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+ if (param_count == 3) {
+ param = param->get_next();
+ param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
+ call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
+ }
+ ir_dereference_variable *return_deref =
+ ir->return_deref->clone(mem_ctx, NULL);
+ return new(mem_ctx) ir_call(sig, return_deref, &call_params);
+}
+
+ir_call *
+lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
+{
+ exec_list& params = ir->actual_parameters;
+
+ if (params.length() < 2 || params.length() > 3)
+ return ir;
+
+ ir_rvalue *rvalue =
+ ((ir_instruction *) params.get_head())->as_rvalue();
+ if (!rvalue)
+ return ir;
+
+ ir_variable *var = rvalue->variable_referenced();
+ if (!var || var->data.mode != ir_var_shader_shared)
+ return ir;
+
+ const char *callee = ir->callee_name();
+ if (!strcmp("__intrinsic_atomic_add", callee) ||
+ !strcmp("__intrinsic_atomic_min", callee) ||
+ !strcmp("__intrinsic_atomic_max", callee) ||
+ !strcmp("__intrinsic_atomic_and", callee) ||
+ !strcmp("__intrinsic_atomic_or", callee) ||
+ !strcmp("__intrinsic_atomic_xor", callee) ||
+ !strcmp("__intrinsic_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap", callee)) {
+ return lower_shared_atomic_intrinsic(ir);
+ }
+
+ return ir;
+}
+
+ir_visitor_status
+lower_shared_reference_visitor::visit_enter(ir_call *ir)
+{
+ ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
+ if (new_ir != ir) {
+ progress = true;
+ base_ir->replace_with(new_ir);
+ return visit_continue_with_parent;
+ }
+
+ return rvalue_visit(ir);
+}
+
+} /* unnamed namespace */
+
+void
+lower_shared_reference(struct gl_shader *shader, unsigned *shared_size)
+{
+ if (shader->Stage != MESA_SHADER_COMPUTE)
+ return;
+
+ lower_shared_reference_visitor v(shader);
+
+ /* Loop over the instructions lowering references, because we take a deref
+ * of an shared variable array using a shared variable dereference as the
+ * index will produce a collection of instructions all of which have cloned
+ * shared variable dereferences for that array index.
+ */
+ do {
+ v.progress = false;
+ visit_list_elements(&v, shader->ir);
+ } while (v.progress);
+
+ *shared_size = v.shared_size;
+}
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index b74aa3d0630..a172054bac8 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -33,106 +33,16 @@
* their own.
*/
-#include "ir.h"
+#include "lower_buffer_access.h"
#include "ir_builder.h"
-#include "ir_rvalue_visitor.h"
#include "main/macros.h"
#include "glsl_parser_extras.h"
using namespace ir_builder;
-/**
- * Determine if a thing being dereferenced is row-major
- *
- * There is some trickery here.
- *
- * If the thing being dereferenced is a member of uniform block \b without an
- * instance name, then the name of the \c ir_variable is the field name of an
- * interface type. If this field is row-major, then the thing referenced is
- * row-major.
- *
- * If the thing being dereferenced is a member of uniform block \b with an
- * instance name, then the last dereference in the tree will be an
- * \c ir_dereference_record. If that record field is row-major, then the
- * thing referenced is row-major.
- */
-static bool
-is_dereferenced_thing_row_major(const ir_rvalue *deref)
-{
- bool matrix = false;
- const ir_rvalue *ir = deref;
-
- while (true) {
- matrix = matrix || ir->type->without_array()->is_matrix();
-
- switch (ir->ir_type) {
- case ir_type_dereference_array: {
- const ir_dereference_array *const array_deref =
- (const ir_dereference_array *) ir;
-
- ir = array_deref->array;
- break;
- }
-
- case ir_type_dereference_record: {
- const ir_dereference_record *const record_deref =
- (const ir_dereference_record *) ir;
-
- ir = record_deref->record;
-
- const int idx = ir->type->field_index(record_deref->field);
- assert(idx >= 0);
-
- const enum glsl_matrix_layout matrix_layout =
- glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
-
- switch (matrix_layout) {
- case GLSL_MATRIX_LAYOUT_INHERITED:
- break;
- case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
- return false;
- case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
- return matrix || deref->type->without_array()->is_record();
- }
-
- break;
- }
-
- case ir_type_dereference_variable: {
- const ir_dereference_variable *const var_deref =
- (const ir_dereference_variable *) ir;
-
- const enum glsl_matrix_layout matrix_layout =
- glsl_matrix_layout(var_deref->var->data.matrix_layout);
-
- switch (matrix_layout) {
- case GLSL_MATRIX_LAYOUT_INHERITED:
- assert(!matrix);
- return false;
- case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
- return false;
- case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
- return matrix || deref->type->without_array()->is_record();
- }
-
- unreachable("invalid matrix layout");
- break;
- }
-
- default:
- return false;
- }
- }
-
- /* The tree must have ended with a dereference that wasn't an
- * ir_dereference_variable. That is invalid, and it should be impossible.
- */
- unreachable("invalid dereference tree");
- return false;
-}
-
namespace {
-class lower_ubo_reference_visitor : public ir_rvalue_enter_visitor {
+class lower_ubo_reference_visitor :
+ public lower_buffer_access::lower_buffer_access {
public:
lower_ubo_reference_visitor(struct gl_shader *shader)
: shader(shader)
@@ -142,30 +52,38 @@ public:
void handle_rvalue(ir_rvalue **rvalue);
ir_visitor_status visit_enter(ir_assignment *ir);
- void setup_for_load_or_store(ir_variable *var,
+ void setup_for_load_or_store(void *mem_ctx,
+ ir_variable *var,
ir_rvalue *deref,
ir_rvalue **offset,
unsigned *const_offset,
bool *row_major,
int *matrix_columns,
unsigned packing);
- ir_expression *ubo_load(const struct glsl_type *type,
+ ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
- ir_call *ssbo_load(const struct glsl_type *type,
+ ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
ir_rvalue *offset);
+ bool check_for_buffer_array_copy(ir_assignment *ir);
+ bool check_for_buffer_struct_copy(ir_assignment *ir);
void check_for_ssbo_store(ir_assignment *ir);
- void write_to_memory(ir_dereference *deref,
- ir_variable *var,
- ir_variable *write_var,
- unsigned write_mask);
- ir_call *ssbo_store(ir_rvalue *deref, ir_rvalue *offset,
+ void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
+ ir_variable *write_var, unsigned write_mask);
+ ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
unsigned write_mask);
- void emit_access(bool is_write, ir_dereference *deref,
- ir_variable *base_offset, unsigned int deref_offset,
- bool row_major, int matrix_columns,
- unsigned packing, unsigned write_mask);
+ enum {
+ ubo_load_access,
+ ssbo_load_access,
+ ssbo_store_access,
+ ssbo_unsized_array_length_access,
+ ssbo_atomic_access,
+ } buffer_access_type;
+
+ void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
+ const glsl_type *type, ir_rvalue *offset,
+ unsigned mask, int channel);
ir_visitor_status visit_enter(class ir_expression *);
ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
@@ -175,7 +93,7 @@ public:
ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
ir_dereference *,
ir_variable *);
- ir_expression *emit_ssbo_get_buffer_size();
+ ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
unsigned calculate_unsized_array_stride(ir_dereference *deref,
unsigned packing);
@@ -184,12 +102,10 @@ public:
ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
ir_visitor_status visit_enter(ir_call *ir);
- void *mem_ctx;
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
ir_rvalue *uniform_block;
bool progress;
- bool is_shader_storage;
};
/**
@@ -324,7 +240,8 @@ interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
}
void
-lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
+lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
+ ir_variable *var,
ir_rvalue *deref,
ir_rvalue **offset,
unsigned *const_offset,
@@ -339,10 +256,9 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
deref, &nonconst_block_index);
/* Locate the block by interface name */
- this->is_shader_storage = var->is_in_shader_storage_block();
unsigned num_blocks;
struct gl_uniform_block **blocks;
- if (this->is_shader_storage) {
+ if (this->buffer_access_type != ubo_load_access) {
num_blocks = shader->NumShaderStorageBlocks;
blocks = shader->ShaderStorageBlocks;
} else {
@@ -370,164 +286,10 @@ lower_ubo_reference_visitor::setup_for_load_or_store(ir_variable *var,
assert(this->uniform_block);
- *offset = new(mem_ctx) ir_constant(0u);
- *const_offset = 0;
- *row_major = is_dereferenced_thing_row_major(deref);
- *matrix_columns = 1;
+ *const_offset = ubo_var->Offset;
- /* Calculate the offset to the start of the region of the UBO
- * dereferenced by *rvalue. This may be a variable offset if an
- * array dereference has a variable index.
- */
- while (deref) {
- switch (deref->ir_type) {
- case ir_type_dereference_variable: {
- *const_offset += ubo_var->Offset;
- deref = NULL;
- break;
- }
-
- case ir_type_dereference_array: {
- ir_dereference_array *deref_array = (ir_dereference_array *) deref;
- unsigned array_stride;
- if (deref_array->array->type->is_vector()) {
- /* We get this when storing or loading a component out of a vector
- * with a non-constant index. This happens for v[i] = f where v is
- * a vector (or m[i][j] = f where m is a matrix). If we don't
- * lower that here, it gets turned into v = vector_insert(v, i,
- * f), which loads the entire vector, modifies one component and
- * then write the entire thing back. That breaks if another
- * thread or SIMD channel is modifying the same vector.
- */
- array_stride = 4;
- if (deref_array->array->type->is_double())
- array_stride *= 2;
- } else if (deref_array->array->type->is_matrix() && *row_major) {
- /* When loading a vector out of a row major matrix, the
- * step between the columns (vectors) is the size of a
- * float, while the step between the rows (elements of a
- * vector) is handled below in emit_ubo_loads.
- */
- array_stride = 4;
- if (deref_array->array->type->is_double())
- array_stride *= 2;
- *matrix_columns = deref_array->array->type->matrix_columns;
- } else if (deref_array->type->without_array()->is_interface()) {
- /* We're processing an array dereference of an interface instance
- * array. The thing being dereferenced *must* be a variable
- * dereference because interfaces cannot be embedded in other
- * types. In terms of calculating the offsets for the lowering
- * pass, we don't care about the array index. All elements of an
- * interface instance array will have the same offsets relative to
- * the base of the block that backs them.
- */
- deref = deref_array->array->as_dereference();
- break;
- } else {
- /* Whether or not the field is row-major (because it might be a
- * bvec2 or something) does not affect the array itself. We need
- * to know whether an array element in its entirety is row-major.
- */
- const bool array_row_major =
- is_dereferenced_thing_row_major(deref_array);
-
- /* The array type will give the correct interface packing
- * information
- */
- if (packing == GLSL_INTERFACE_PACKING_STD430) {
- array_stride = deref_array->type->std430_array_stride(array_row_major);
- } else {
- array_stride = deref_array->type->std140_size(array_row_major);
- array_stride = glsl_align(array_stride, 16);
- }
- }
-
- ir_rvalue *array_index = deref_array->array_index;
- if (array_index->type->base_type == GLSL_TYPE_INT)
- array_index = i2u(array_index);
-
- ir_constant *const_index =
- array_index->constant_expression_value(NULL);
- if (const_index) {
- *const_offset += array_stride * const_index->value.u[0];
- } else {
- *offset = add(*offset,
- mul(array_index,
- new(mem_ctx) ir_constant(array_stride)));
- }
- deref = deref_array->array->as_dereference();
- break;
- }
-
- case ir_type_dereference_record: {
- ir_dereference_record *deref_record = (ir_dereference_record *) deref;
- const glsl_type *struct_type = deref_record->record->type;
- unsigned intra_struct_offset = 0;
-
- for (unsigned int i = 0; i < struct_type->length; i++) {
- const glsl_type *type = struct_type->fields.structure[i].type;
-
- ir_dereference_record *field_deref = new(mem_ctx)
- ir_dereference_record(deref_record->record,
- struct_type->fields.structure[i].name);
- const bool field_row_major =
- is_dereferenced_thing_row_major(field_deref);
-
- ralloc_free(field_deref);
-
- unsigned field_align = 0;
-
- if (packing == GLSL_INTERFACE_PACKING_STD430)
- field_align = type->std430_base_alignment(field_row_major);
- else
- field_align = type->std140_base_alignment(field_row_major);
-
- intra_struct_offset = glsl_align(intra_struct_offset, field_align);
-
- if (strcmp(struct_type->fields.structure[i].name,
- deref_record->field) == 0)
- break;
-
- if (packing == GLSL_INTERFACE_PACKING_STD430)
- intra_struct_offset += type->std430_size(field_row_major);
- else
- intra_struct_offset += type->std140_size(field_row_major);
-
- /* If the field just examined was itself a structure, apply rule
- * #9:
- *
- * "The structure may have padding at the end; the base offset
- * of the member following the sub-structure is rounded up to
- * the next multiple of the base alignment of the structure."
- */
- if (type->without_array()->is_record()) {
- intra_struct_offset = glsl_align(intra_struct_offset,
- field_align);
-
- }
- }
-
- *const_offset += intra_struct_offset;
- deref = deref_record->record->as_dereference();
- break;
- }
-
- case ir_type_swizzle: {
- ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
-
- assert(deref_swizzle->mask.num_components == 1);
-
- *const_offset += deref_swizzle->mask.x * sizeof(int);
- deref = deref_swizzle->val->as_dereference();
- break;
- }
-
- default:
- assert(!"not reached");
- deref = NULL;
- break;
- }
- }
+ setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major,
+ matrix_columns, packing);
}
void
@@ -544,7 +306,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
if (!var || !var->is_in_buffer_block())
return;
- mem_ctx = ralloc_parent(shader->ir);
+ void *mem_ctx = ralloc_parent(shader->ir);
ir_rvalue *offset = NULL;
unsigned const_offset;
@@ -552,10 +314,14 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
int matrix_columns;
unsigned packing = var->get_interface_type()->interface_packing;
+ this->buffer_access_type =
+ var->is_in_shader_storage_block() ?
+ ssbo_load_access : ubo_load_access;
+
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
*/
- setup_for_load_or_store(var, deref,
+ setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns,
packing);
@@ -577,7 +343,7 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
base_ir->insert_before(assign(load_offset, offset));
deref = new(mem_ctx) ir_dereference_variable(load_var);
- emit_access(false, deref, load_offset, const_offset,
+ emit_access(mem_ctx, false, deref, load_offset, const_offset,
row_major, matrix_columns, packing, 0);
*rvalue = deref;
@@ -585,7 +351,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
}
ir_expression *
-lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
+lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
+ const glsl_type *type,
ir_rvalue *offset)
{
ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
@@ -604,7 +371,8 @@ shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
}
ir_call *
-lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
+lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
+ ir_rvalue *deref,
ir_rvalue *offset,
unsigned write_mask)
{
@@ -644,7 +412,8 @@ lower_ubo_reference_visitor::ssbo_store(ir_rvalue *deref,
}
ir_call *
-lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
+lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
+ const struct glsl_type *type,
ir_rvalue *offset)
{
exec_list sig_params;
@@ -679,208 +448,46 @@ lower_ubo_reference_visitor::ssbo_load(const struct glsl_type *type,
return new(mem_ctx) ir_call(sig, deref_result, &call_params);
}
-static inline int
-writemask_for_size(unsigned n)
-{
- return ((1 << n) - 1);
-}
-
-/**
- * Takes a deref and recursively calls itself to break the deref down to the
- * point that the reads or writes generated are contiguous scalars or vectors.
- */
void
-lower_ubo_reference_visitor::emit_access(bool is_write,
- ir_dereference *deref,
- ir_variable *base_offset,
- unsigned int deref_offset,
- bool row_major,
- int matrix_columns,
- unsigned packing,
- unsigned write_mask)
+lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
+ ir_dereference *deref,
+ const glsl_type *type,
+ ir_rvalue *offset,
+ unsigned mask,
+ int channel)
{
- if (deref->type->is_record()) {
- unsigned int field_offset = 0;
-
- for (unsigned i = 0; i < deref->type->length; i++) {
- const struct glsl_struct_field *field =
- &deref->type->fields.structure[i];
- ir_dereference *field_deref =
- new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
- field->name);
-
- field_offset =
- glsl_align(field_offset,
- field->type->std140_base_alignment(row_major));
-
- emit_access(is_write, field_deref, base_offset,
- deref_offset + field_offset,
- row_major, 1, packing,
- writemask_for_size(field_deref->type->vector_elements));
-
- field_offset += field->type->std140_size(row_major);
- }
- return;
- }
-
- if (deref->type->is_array()) {
- unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
- deref->type->fields.array->std430_array_stride(row_major) :
- glsl_align(deref->type->fields.array->std140_size(row_major), 16);
-
- for (unsigned i = 0; i < deref->type->length; i++) {
- ir_constant *element = new(mem_ctx) ir_constant(i);
- ir_dereference *element_deref =
- new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
- element);
- emit_access(is_write, element_deref, base_offset,
- deref_offset + i * array_stride,
- row_major, 1, packing,
- writemask_for_size(element_deref->type->vector_elements));
- }
- return;
- }
-
- if (deref->type->is_matrix()) {
- for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
- ir_constant *col = new(mem_ctx) ir_constant(i);
- ir_dereference *col_deref =
- new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
-
- if (row_major) {
- /* For a row-major matrix, the next column starts at the next
- * element.
- */
- int size_mul = deref->type->is_double() ? 8 : 4;
- emit_access(is_write, col_deref, base_offset,
- deref_offset + i * size_mul,
- row_major, deref->type->matrix_columns, packing,
- writemask_for_size(col_deref->type->vector_elements));
- } else {
- int size_mul;
-
- /* std430 doesn't round up vec2 size to a vec4 size */
- if (packing == GLSL_INTERFACE_PACKING_STD430 &&
- deref->type->vector_elements == 2 &&
- !deref->type->is_double()) {
- size_mul = 8;
- } else {
- /* std140 always rounds the stride of arrays (and matrices) to a
- * vec4, so matrices are always 16 between columns/rows. With
- * doubles, they will be 32 apart when there are more than 2 rows.
- *
- * For both std140 and std430, if the member is a
- * three-'component vector with components consuming N basic
- * machine units, the base alignment is 4N. For vec4, base
- * alignment is 4N.
- */
- size_mul = (deref->type->is_double() &&
- deref->type->vector_elements > 2) ? 32 : 16;
- }
-
- emit_access(is_write, col_deref, base_offset,
- deref_offset + i * size_mul,
- row_major, deref->type->matrix_columns, packing,
- writemask_for_size(col_deref->type->vector_elements));
- }
- }
- return;
+ switch (this->buffer_access_type) {
+ case ubo_load_access:
+ base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
+ ubo_load(mem_ctx, type, offset),
+ mask));
+ break;
+ case ssbo_load_access: {
+ ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
+ base_ir->insert_before(load_ssbo);
+ ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
+ ir_assignment *assignment =
+ assign(deref->clone(mem_ctx, NULL), value, mask);
+ base_ir->insert_before(assignment);
+ break;
}
-
- assert(deref->type->is_scalar() || deref->type->is_vector());
-
- if (!row_major) {
- ir_rvalue *offset =
- add(base_offset, new(mem_ctx) ir_constant(deref_offset));
- if (is_write)
- base_ir->insert_after(ssbo_store(deref, offset, write_mask));
- else {
- if (!this->is_shader_storage) {
- base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
- ubo_load(deref->type, offset)));
- } else {
- ir_call *load_ssbo = ssbo_load(deref->type, offset);
- base_ir->insert_before(load_ssbo);
- ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
- base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), value));
- }
- }
- } else {
- unsigned N = deref->type->is_double() ? 8 : 4;
-
- /* We're dereffing a column out of a row-major matrix, so we
- * gather the vector from each stored row.
- */
- assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
- deref->type->base_type == GLSL_TYPE_DOUBLE);
- /* Matrices, row_major or not, are stored as if they were
- * arrays of vectors of the appropriate size in std140.
- * Arrays have their strides rounded up to a vec4, so the
- * matrix stride is always 16. However a double matrix may either be 16
- * or 32 depending on the number of columns.
- */
- assert(matrix_columns <= 4);
- unsigned matrix_stride = 0;
- /* Matrix stride for std430 mat2xY matrices are not rounded up to
- * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
- * Block Layout":
- *
- * "2. If the member is a two- or four-component vector with components
- * consuming N basic machine units, the base alignment is 2N or 4N,
- * respectively." [...]
- * "4. If the member is an array of scalars or vectors, the base alignment
- * and array stride are set to match the base alignment of a single array
- * element, according to rules (1), (2), and (3), and rounded up to the
- * base alignment of a vec4." [...]
- * "7. If the member is a row-major matrix with C columns and R rows, the
- * matrix is stored identically to an array of R row vectors with C
- * components each, according to rule (4)." [...]
- * "When using the std430 storage layout, shader storage blocks will be
- * laid out in buffer storage identically to uniform and shader storage
- * blocks using the std140 layout, except that the base alignment and
- * stride of arrays of scalars and vectors in rule 4 and of structures in
- * rule 9 are not rounded up a multiple of the base alignment of a vec4."
- */
- if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
- matrix_stride = 2 * N;
- else
- matrix_stride = glsl_align(matrix_columns * N, 16);
-
- const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
- glsl_type::float_type : glsl_type::double_type;
-
- for (unsigned i = 0; i < deref->type->vector_elements; i++) {
- ir_rvalue *chan_offset =
- add(base_offset,
- new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
- if (is_write) {
- /* If the component is not in the writemask, then don't
- * store any value.
- */
- if (!((1 << i) & write_mask))
- continue;
-
- base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
- } else {
- if (!this->is_shader_storage) {
- base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
- ubo_load(deref_type, chan_offset),
- (1U << i)));
- } else {
- ir_call *load_ssbo = ssbo_load(deref_type, chan_offset);
- base_ir->insert_before(load_ssbo);
- ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
- base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
- value,
- (1U << i)));
- }
- }
+ case ssbo_store_access:
+ if (channel >= 0) {
+ base_ir->insert_after(ssbo_store(mem_ctx,
+ swizzle(deref, channel, 1),
+ offset, 1));
+ } else {
+ base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
}
+ break;
+ default:
+ unreachable("invalid buffer_access_type in insert_buffer_access");
}
}
void
-lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
+lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
+ ir_dereference *deref,
ir_variable *var,
ir_variable *write_var,
unsigned write_mask)
@@ -891,10 +498,12 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
int matrix_columns;
unsigned packing = var->get_interface_type()->interface_packing;
+ this->buffer_access_type = ssbo_store_access;
+
/* Compute the offset to the start if the dereference as well as other
* information we need to configure the write
*/
- setup_for_load_or_store(var, deref,
+ setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns,
packing);
@@ -910,7 +519,7 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
base_ir->insert_before(assign(write_offset, offset));
deref = new(mem_ctx) ir_dereference_variable(write_var);
- emit_access(true, deref, write_offset, const_offset,
+ emit_access(mem_ctx, true, deref, write_offset, const_offset,
row_major, matrix_columns, packing, write_mask);
}
@@ -985,7 +594,7 @@ lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assig
}
ir_expression *
-lower_ubo_reference_visitor::emit_ssbo_get_buffer_size()
+lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
{
ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
@@ -1059,7 +668,7 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
ir_dereference *deref,
ir_variable *var)
{
- mem_ctx = ralloc_parent(*rvalue);
+ void *mem_ctx = ralloc_parent(*rvalue);
ir_rvalue *base_offset = NULL;
unsigned const_offset;
@@ -1068,17 +677,19 @@ lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalu
unsigned packing = var->get_interface_type()->interface_packing;
int unsized_array_stride = calculate_unsized_array_stride(deref, packing);
+ this->buffer_access_type = ssbo_unsized_array_length_access;
+
/* Compute the offset to the start if the dereference as well as other
* information we need to calculate the length.
*/
- setup_for_load_or_store(var, deref,
+ setup_for_load_or_store(mem_ctx, var, deref,
&base_offset, &const_offset,
&row_major, &matrix_columns,
packing);
/* array.length() =
* max((buffer_object_size - offset_of_array) / stride_of_array, 0)
*/
- ir_expression *buffer_size = emit_ssbo_get_buffer_size();
+ ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
ir_expression *offset_of_array = new(mem_ctx)
ir_expression(ir_binop_add, base_offset,
@@ -1112,13 +723,13 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
return;
ir_variable *var = ir->lhs->variable_referenced();
- if (!var || !var->is_in_buffer_block())
+ if (!var || !var->is_in_shader_storage_block())
return;
/* We have a write to a buffer variable, so declare a temporary and rewrite
* the assignment so that the temporary is the LHS.
*/
- mem_ctx = ralloc_parent(shader->ir);
+ void *mem_ctx = ralloc_parent(shader->ir);
const glsl_type *type = rvalue->type;
ir_variable *write_var = new(mem_ctx) ir_variable(type,
@@ -1128,14 +739,131 @@ lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
/* Now we have to write the value assigned to the temporary back to memory */
- write_to_memory(deref, var, write_var, ir->write_mask);
+ write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
progress = true;
}
+static bool
+is_buffer_backed_variable(ir_variable *var)
+{
+ return var->is_in_buffer_block() ||
+ var->data.mode == ir_var_shader_shared;
+}
+
+bool
+lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
+{
+ if (!ir || !ir->lhs || !ir->rhs)
+ return false;
+
+ /* LHS and RHS must be arrays
+ * FIXME: arrays of arrays?
+ */
+ if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
+ return false;
+
+ /* RHS must be a buffer-backed variable. This is what can cause the problem
+ * since it would lead to a series of loads that need to live until we
+ * see the writes to the LHS.
+ */
+ ir_variable *rhs_var = ir->rhs->variable_referenced();
+ if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+ return false;
+
+ /* Split the array copy into individual element copies to reduce
+ * register pressure
+ */
+ ir_dereference *rhs_deref = ir->rhs->as_dereference();
+ if (!rhs_deref)
+ return false;
+
+ ir_dereference *lhs_deref = ir->lhs->as_dereference();
+ if (!lhs_deref)
+ return false;
+
+ assert(lhs_deref->type->length == rhs_deref->type->length);
+ void *mem_ctx = ralloc_parent(shader->ir);
+
+ for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+ ir_dereference *lhs_i =
+ new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+
+ ir_dereference *rhs_i =
+ new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
+ new(mem_ctx) ir_constant(i));
+ ir->insert_after(assign(lhs_i, rhs_i));
+ }
+
+ ir->remove();
+ progress = true;
+ return true;
+}
+
+bool
+lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
+{
+ if (!ir || !ir->lhs || !ir->rhs)
+ return false;
+
+ /* LHS and RHS must be records */
+ if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
+ return false;
+
+ /* RHS must be a buffer-backed variable. This is what can cause the problem
+ * since it would lead to a series of loads that need to live until we
+ * see the writes to the LHS.
+ */
+ ir_variable *rhs_var = ir->rhs->variable_referenced();
+ if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+ return false;
+
+ /* Split the struct copy into individual element copies to reduce
+ * register pressure
+ */
+ ir_dereference *rhs_deref = ir->rhs->as_dereference();
+ if (!rhs_deref)
+ return false;
+
+ ir_dereference *lhs_deref = ir->lhs->as_dereference();
+ if (!lhs_deref)
+ return false;
+
+ assert(lhs_deref->type->record_compare(rhs_deref->type));
+ void *mem_ctx = ralloc_parent(shader->ir);
+
+ for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+ const char *field_name = lhs_deref->type->fields.structure[i].name;
+ ir_dereference *lhs_field =
+ new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
+ field_name);
+ ir_dereference *rhs_field =
+ new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
+ field_name);
+ ir->insert_after(assign(lhs_field, rhs_field));
+ }
+
+ ir->remove();
+ progress = true;
+ return true;
+}
ir_visitor_status
lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
{
+ /* Array and struct copies could involve large amounts of load/store
+ * operations. To improve register pressure we want to special-case
+ * these and split them into individual element copies.
+ * This way we avoid emitting all the loads for the RHS first and
+ * all the writes for the LHS second and register usage is more
+ * efficient.
+ */
+ if (check_for_buffer_array_copy(ir))
+ return visit_continue_with_parent;
+
+ if (check_for_buffer_struct_copy(ir))
+ return visit_continue_with_parent;
+
check_ssbo_unsized_array_length_assignment(ir);
check_for_ssbo_store(ir);
return rvalue_visit(ir);
@@ -1173,7 +901,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
/* Compute the offset to the start if the dereference and the
* block index
*/
- mem_ctx = ralloc_parent(shader->ir);
+ void *mem_ctx = ralloc_parent(shader->ir);
ir_rvalue *offset = NULL;
unsigned const_offset;
@@ -1181,7 +909,9 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
int matrix_columns;
unsigned packing = var->get_interface_type()->interface_packing;
- setup_for_load_or_store(var, deref,
+ this->buffer_access_type = ssbo_atomic_access;
+
+ setup_for_load_or_store(mem_ctx, var, deref,
&offset, &const_offset,
&row_major, &matrix_columns,
packing);
@@ -1225,7 +955,7 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
sig->is_intrinsic = true;
char func_name[64];
- sprintf(func_name, "%s_internal", ir->callee_name());
+ sprintf(func_name, "%s_ssbo", ir->callee_name());
ir_function *f = new(mem_ctx) ir_function(func_name);
f->add_signature(sig);
@@ -1249,15 +979,29 @@ lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
ir_call *
lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
{
+ exec_list& params = ir->actual_parameters;
+
+ if (params.length() < 2 || params.length() > 3)
+ return ir;
+
+ ir_rvalue *rvalue =
+ ((ir_instruction *) params.get_head())->as_rvalue();
+ if (!rvalue)
+ return ir;
+
+ ir_variable *var = rvalue->variable_referenced();
+ if (!var || !var->is_in_shader_storage_block())
+ return ir;
+
const char *callee = ir->callee_name();
- if (!strcmp("__intrinsic_ssbo_atomic_add", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_min", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_max", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_and", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_or", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_xor", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_exchange", callee) ||
- !strcmp("__intrinsic_ssbo_atomic_comp_swap", callee)) {
+ if (!strcmp("__intrinsic_atomic_add", callee) ||
+ !strcmp("__intrinsic_atomic_min", callee) ||
+ !strcmp("__intrinsic_atomic_max", callee) ||
+ !strcmp("__intrinsic_atomic_and", callee) ||
+ !strcmp("__intrinsic_atomic_or", callee) ||
+ !strcmp("__intrinsic_atomic_xor", callee) ||
+ !strcmp("__intrinsic_atomic_exchange", callee) ||
+ !strcmp("__intrinsic_atomic_comp_swap", callee)) {
return lower_ssbo_atomic_intrinsic(ir);
}
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 1ab3afecc7e..a1ba9345e32 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -378,6 +378,9 @@ public:
case ir_var_shader_storage:
return this->lower_uniforms;
+ case ir_var_shader_shared:
+ return false;
+
case ir_var_function_in:
case ir_var_const_in:
return this->lower_temps;
diff --git a/src/glsl/nir/builtin_type_macros.h b/src/glsl/nir/builtin_type_macros.h
index 8e16ae45489..7bd2e4e6558 100644
--- a/src/glsl/nir/builtin_type_macros.h
+++ b/src/glsl/nir/builtin_type_macros.h
@@ -28,8 +28,6 @@
* language version or extension might provide them.
*/
-#include "glsl_types.h"
-
DECL_TYPE(error, GL_INVALID_ENUM, GLSL_TYPE_ERROR, 0, 0)
DECL_TYPE(void, GL_INVALID_ENUM, GLSL_TYPE_VOID, 0, 0)
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index a26300d1d26..9a25f2fc905 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -691,15 +691,15 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_store_ssbo;
} else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) {
op = nir_intrinsic_load_ssbo;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_add_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_add;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_and_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_and;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_or_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_or;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_xor_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_xor;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_min_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) {
assert(ir->return_deref);
if (ir->return_deref->type == glsl_type::int_type)
op = nir_intrinsic_ssbo_atomic_imin;
@@ -707,7 +707,7 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_ssbo_atomic_umin;
else
unreachable("Invalid type");
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_max_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) {
assert(ir->return_deref);
if (ir->return_deref->type == glsl_type::int_type)
op = nir_intrinsic_ssbo_atomic_imax;
@@ -715,9 +715,9 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_ssbo_atomic_umax;
else
unreachable("Invalid type");
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_exchange_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_exchange;
- } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) {
op = nir_intrinsic_ssbo_atomic_comp_swap;
} else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
op = nir_intrinsic_shader_clock;
@@ -731,6 +731,38 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_memory_barrier_image;
} else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) {
op = nir_intrinsic_memory_barrier_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) {
+ op = nir_intrinsic_load_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) {
+ op = nir_intrinsic_store_shared;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_add;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_and;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_or;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_xor;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imin;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umin;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) {
+ assert(ir->return_deref);
+ if (ir->return_deref->type == glsl_type::int_type)
+ op = nir_intrinsic_shared_atomic_imax;
+ else if (ir->return_deref->type == glsl_type::uint_type)
+ op = nir_intrinsic_shared_atomic_umax;
+ else
+ unreachable("Invalid type");
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_exchange;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) {
+ op = nir_intrinsic_shared_atomic_comp_swap;
} else {
unreachable("not reached");
}
@@ -857,24 +889,12 @@ nir_visitor::visit(ir_call *ir)
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
assert(write_mask);
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_store_ssbo_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
-
- instr->const_index[1] = write_mask->value.u[0];
-
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));
+ instr->const_index[0] = write_mask->value.u[0];
instr->num_components = val->type->vector_elements;
- instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -885,20 +905,8 @@ nir_visitor::visit(ir_call *ir)
param = param->get_next();
ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
- /* Check if we need the indirect version */
- ir_constant *const_offset = offset->as_constant();
- if (!const_offset) {
- op = nir_intrinsic_load_ssbo_indirect;
- ralloc_free(instr);
- instr = nir_intrinsic_instr_create(shader, op);
- instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
- instr->const_index[0] = 0;
- dest = &instr->dest;
- } else {
- instr->const_index[0] = const_offset->value.u[0];
- }
-
instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block));
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
const glsl_type *type = ir->return_deref->var->type;
instr->num_components = type->vector_elements;
@@ -978,6 +986,84 @@ nir_visitor::visit(ir_call *ir)
nir_builder_instr_insert(&b, &instr->instr);
break;
}
+ case nir_intrinsic_load_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ instr->const_index[0] = 0;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ const glsl_type *type = ir->return_deref->var->type;
+ instr->num_components = type->vector_elements;
+
+ /* Setup destination register */
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ type->vector_elements, NULL);
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_store_shared: {
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+
+ param = param->get_next();
+ ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
+ assert(write_mask);
+
+ instr->const_index[0] = 0;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));
+
+ instr->const_index[1] = write_mask->value.u[0];
+
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val));
+ instr->num_components = val->type->vector_elements;
+
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap: {
+ int param_count = ir->actual_parameters.length();
+ assert(param_count == 2 || param_count == 3);
+
+ /* Offset */
+ exec_node *param = ir->actual_parameters.get_head();
+ ir_instruction *inst = (ir_instruction *) param;
+ instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data1 parameter (this is always present) */
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+
+ /* data2 parameter (only with atomic_comp_swap) */
+ if (param_count == 3) {
+ assert(op == nir_intrinsic_shared_atomic_comp_swap);
+ param = param->get_next();
+ inst = (ir_instruction *) param;
+ instr->src[2] =
+ nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));
+ }
+
+ /* Atomic result */
+ assert(ir->return_deref);
+ nir_ssa_dest_init(&instr->instr, &instr->dest,
+ ir->return_deref->type->vector_elements, NULL);
+ nir_builder_instr_insert(&b, &instr->instr);
+ break;
+ }
default:
unreachable("not reached");
}
@@ -1178,21 +1264,11 @@ nir_visitor::visit(ir_expression *ir)
/* Some special cases */
switch (ir->operation) {
case ir_binop_ubo_load: {
- ir_constant *const_index = ir->operands[1]->as_constant();
-
- nir_intrinsic_op op;
- if (const_index) {
- op = nir_intrinsic_load_ubo;
- } else {
- op = nir_intrinsic_load_ubo_indirect;
- }
-
- nir_intrinsic_instr *load = nir_intrinsic_instr_create(this->shader, op);
+ nir_intrinsic_instr *load =
+ nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
load->num_components = ir->type->vector_elements;
- load->const_index[0] = const_index ? const_index->value.u[0] : 0; /* base offset */
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
- if (!const_index)
- load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
+ load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
add_instr(&load->instr, ir->type->vector_elements);
/*
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 64b5c0cb106..bc8677ba6fc 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -22,7 +22,7 @@
*/
#include <stdio.h>
-#include "main/core.h" /* for Elements, MAX2 */
+#include "main/macros.h"
#include "glsl_parser_extras.h"
#include "glsl_types.h"
#include "util/hash_table.h"
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 79df6d3df94..94bb76034a2 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1382,13 +1382,13 @@ static inline bool
foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state)
{
if (reverse) {
- foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
&if_stmt->else_list) {
if (!foreach_cf_node(node, cb, reverse, state))
return false;
}
- foreach_list_typed_safe_reverse(nir_cf_node, node, node,
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node,
&if_stmt->then_list) {
if (!foreach_cf_node(node, cb, reverse, state))
return false;
@@ -1412,7 +1412,7 @@ static inline bool
foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state)
{
if (reverse) {
- foreach_list_typed_safe_reverse(nir_cf_node, node, node, &loop->body) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) {
if (!foreach_cf_node(node, cb, reverse, state))
return false;
}
@@ -1472,7 +1472,7 @@ nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb,
if (!cb(impl->end_block, state))
return false;
- foreach_list_typed_safe_reverse(nir_cf_node, node, node, &impl->body) {
+ foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) {
if (!foreach_cf_node(node, cb, true, state))
return false;
}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index b7374e17407..021c4280557 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1309,8 +1309,8 @@ nir_block_last_instr(nir_block *block)
foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list)
#define nir_foreach_instr_safe(block, instr) \
foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list)
-#define nir_foreach_instr_safe_reverse(block, instr) \
- foreach_list_typed_safe_reverse(nir_instr, instr, node, &(block)->instr_list)
+#define nir_foreach_instr_reverse_safe(block, instr) \
+ foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list)
typedef struct nir_if {
nir_cf_node cf_node;
@@ -2018,7 +2018,7 @@ void nir_assign_var_locations(struct exec_list *var_list,
void nir_lower_io(nir_shader *shader,
nir_variable_mode mode,
int (*type_size)(const struct glsl_type *));
-nir_src *nir_get_io_indirect_src(nir_intrinsic_instr *instr);
+nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr);
nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr);
void nir_lower_vars_to_ssa(nir_shader *shader);
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index b16ef503c92..32784f6398d 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -32,14 +32,6 @@ template = """\
#include "util/half_float.h"
#include "nir_constant_expressions.h"
-#if defined(__SUNPRO_CC)
-#include <ieeefp.h>
-static int isnormal(double x)
-{
- return fpclass(x) == FP_NORMAL;
-}
-#endif
-
/**
* Evaluate one component of packSnorm4x8.
*/
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index de30db61eea..5086e297e8e 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -222,6 +222,33 @@ INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0)
+/*
+ * CS shared variable atomic intrinsics
+ *
+ * All of the shared variable atomic memory operations read a value from
+ * memory, compute a new value using one of the operations below, write the
+ * new value to memory, and return the original value read.
+ *
+ * All operations take 2 sources except CompSwap that takes 3. These
+ * sources represent:
+ *
+ * 0: The offset into the shared variable storage region that the atomic
+ * operation will operate on.
+ * 1: The data parameter to the atomic function (i.e. the value to add
+ * in shared_atomic_add, etc).
+ * 2: For CompSwap only: the second data parameter.
+ */
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0)
+
#define SYSTEM_VALUE(name, components, num_indices) \
INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
@@ -247,56 +274,62 @@ SYSTEM_VALUE(num_work_groups, 3, 0)
SYSTEM_VALUE(helper_invocation, 1, 0)
/*
- * The format of the indices depends on the type of the load. For uniforms,
- * the first index is the base address and the second index is an offset that
- * should be added to the base address. (This way you can determine in the
- * back-end which variable is being accessed even in an array.) For inputs,
- * the one and only index corresponds to the attribute slot. UBO loads
- * have two indices the first of which is the descriptor set and the second
- * is the base address to load from.
+ * Load operations pull data from some piece of GPU memory. All load
+ * operations operate in terms of offsets into some piece of theoretical
+ * memory. Loads from externally visible memory (UBO and SSBO) simply take a
+ * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.)
+ * take a base+offset pair where the base (const_index[0]) gives the location
+ * of the start of the variable being loaded and and the offset source is a
+ * offset into that variable.
*
- * UBO loads have a (possibly constant) source which is the UBO buffer index.
- * For each type of load, the _indirect variant has one additional source
- * (the second in the case of UBO's) that is the is an indirect to be added to
- * the constant address or base offset to compute the final offset.
+ * Some load operations such as UBO/SSBO load and per_vertex loads take an
+ * additional source to specify which UBO/SSBO/vertex to load from.
*
- * For vector backends, the address is in terms of one vec4, and so each array
- * element is +4 scalar components from the previous array element. For scalar
- * backends, the address is in terms of a single 4-byte float/int and arrays
- * elements begin immediately after the previous array element.
+ * The exact address type depends on the lowering pass that generates the
+ * load/store intrinsics. Typically, this is vec4 units for things such as
+ * varying slots and float units for fragment shader inputs. UBO and SSBO
+ * offsets are always in bytes.
*/
-#define LOAD(name, extra_srcs, indices, flags) \
- INTRINSIC(load_##name, extra_srcs, ARR(1), true, 0, 0, indices, flags) \
- INTRINSIC(load_##name##_indirect, extra_srcs + 1, ARR(1, 1), \
- true, 0, 0, indices, flags)
+#define LOAD(name, srcs, indices, flags) \
+ INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags)
-LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
-LOAD(push_constant, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
+/* src[] = { buffer_index, offset }. No const_index */
+LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { vertex, offset }. const_index[] = { base } */
+LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base } */
+LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+/* src[] = { offset }. const_index[] = { base, size } */
+LOAD(push_constant, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
/*
- * Stores work the same way as loads, except now the first register input is
- * the value or array to store and the optional second input is the indirect
- * offset. SSBO stores are similar, but they accept an extra source for the
- * block index and an extra index with the writemask to use.
+ * Stores work the same way as loads, except now the first source is the value
+ * to store and the second (and possibly third) source specify where to store
+ * the value. SSBO and shared memory stores also have a write mask as
+ * const_index[0].
*/
-#define STORE(name, extra_srcs, extra_srcs_size, extra_indices, flags) \
- INTRINSIC(store_##name, 1 + extra_srcs, \
- ARR(0, extra_srcs_size, extra_srcs_size, extra_srcs_size), \
- false, 0, 0, 1 + extra_indices, flags) \
- INTRINSIC(store_##name##_indirect, 2 + extra_srcs, \
- ARR(0, 1, extra_srcs_size, extra_srcs_size), \
- false, 0, 0, 1 + extra_indices, flags)
+#define STORE(name, srcs, indices, flags) \
+ INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags)
-STORE(output, 0, 0, 0, 0)
-STORE(per_vertex_output, 1, 1, 0, 0)
-STORE(ssbo, 1, 1, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base } */
+STORE(output, 2, 1, 0)
+/* src[] = { value, vertex, offset }. const_index[] = { base } */
+STORE(per_vertex_output, 3, 1, 0)
+/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
+STORE(ssbo, 3, 1, 0)
+/* src[] = { value, offset }. const_index[] = { base, write_mask } */
+STORE(shared, 2, 1, 0)
-LAST_INTRINSIC(store_ssbo_indirect)
+LAST_INTRINSIC(store_shared)
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index c58c7785b3f..e2a2bb689a8 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -74,6 +74,7 @@ store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val)
store->const_index[0] = out->data.driver_location;
store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]);
store->src[0].is_ssa = true;
+ store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_builder_instr_insert(b, &store->instr);
}
@@ -85,6 +86,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
load->num_components = 4;
load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_builder_instr_insert(b, &load->instr);
@@ -112,6 +114,7 @@ find_output_in_block(nir_block *block, void *void_state)
intr->const_index[0] == state->drvloc) {
assert(state->def == NULL);
assert(intr->src[0].is_ssa);
+ assert(nir_src_as_const_value(intr->src[1]));
state->def = intr->src[0].ssa;
#if !defined(DEBUG)
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 5683e69d865..ec6d09d5b6d 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -86,18 +86,11 @@ is_per_vertex_output(struct lower_io_state *state, nir_variable *var)
stage == MESA_SHADER_TESS_CTRL;
}
-static unsigned
-get_io_offset(nir_deref_var *deref, nir_instr *instr,
+static nir_ssa_def *
+get_io_offset(nir_builder *b, nir_deref_var *deref,
nir_ssa_def **vertex_index,
- nir_ssa_def **out_indirect,
- struct lower_io_state *state)
+ int (*type_size)(const struct glsl_type *))
{
- nir_ssa_def *indirect = NULL;
- unsigned base_offset = 0;
-
- nir_builder *b = &state->builder;
- b->cursor = nir_before_instr(instr);
-
nir_deref *tail = &deref->deref;
/* For per-vertex input arrays (i.e. geometry shader inputs), keep the
@@ -115,64 +108,57 @@ get_io_offset(nir_deref_var *deref, nir_instr *instr,
*vertex_index = vtx;
}
+ /* Just emit code and let constant-folding go to town */
+ nir_ssa_def *offset = nir_imm_int(b, 0);
+
while (tail->child != NULL) {
const struct glsl_type *parent_type = tail->type;
tail = tail->child;
if (tail->deref_type == nir_deref_type_array) {
nir_deref_array *deref_array = nir_deref_as_array(tail);
- unsigned size = state->type_size(tail->type);
+ unsigned size = type_size(tail->type);
- base_offset += size * deref_array->base_offset;
+ offset = nir_iadd(b, offset,
+ nir_imm_int(b, size * deref_array->base_offset));
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_ssa_def *mul =
nir_imul(b, nir_imm_int(b, size),
nir_ssa_for_src(b, deref_array->indirect, 1));
- indirect = indirect ? nir_iadd(b, indirect, mul) : mul;
+ offset = nir_iadd(b, offset, mul);
}
} else if (tail->deref_type == nir_deref_type_struct) {
nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
+ unsigned field_offset = 0;
for (unsigned i = 0; i < deref_struct->index; i++) {
- base_offset +=
- state->type_size(glsl_get_struct_field(parent_type, i));
+ field_offset += type_size(glsl_get_struct_field(parent_type, i));
}
+ offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
}
}
- *out_indirect = indirect;
- return base_offset;
+ return offset;
}
static nir_intrinsic_op
load_op(struct lower_io_state *state,
- nir_variable_mode mode, bool per_vertex, bool has_indirect)
+ nir_variable_mode mode, bool per_vertex)
{
nir_intrinsic_op op;
switch (mode) {
case nir_var_shader_in:
- if (per_vertex) {
- op = has_indirect ? nir_intrinsic_load_per_vertex_input_indirect :
- nir_intrinsic_load_per_vertex_input;
- } else {
- op = has_indirect ? nir_intrinsic_load_input_indirect :
- nir_intrinsic_load_input;
- }
+ op = per_vertex ? nir_intrinsic_load_per_vertex_input :
+ nir_intrinsic_load_input;
break;
case nir_var_shader_out:
- if (per_vertex) {
- op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
- nir_intrinsic_load_per_vertex_output;
- } else {
- op = has_indirect ? nir_intrinsic_load_output_indirect :
- nir_intrinsic_load_output;
- }
+ op = per_vertex ? nir_intrinsic_load_per_vertex_output :
+ nir_intrinsic_load_output;
break;
case nir_var_uniform:
- op = has_indirect ? nir_intrinsic_load_uniform_indirect :
- nir_intrinsic_load_uniform;
+ op = nir_intrinsic_load_uniform;
break;
default:
unreachable("Unknown variable mode");
@@ -185,6 +171,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
{
struct lower_io_state *state = void_state;
+ nir_builder *b = &state->builder;
+
nir_foreach_instr_safe(block, instr) {
if (instr->type != nir_instr_type_intrinsic)
continue;
@@ -205,38 +193,33 @@ nir_lower_io_block(nir_block *block, void *void_state)
mode != nir_var_uniform)
continue;
+ b->cursor = nir_before_instr(instr);
+
switch (intrin->intrinsic) {
case nir_intrinsic_load_var: {
bool per_vertex =
is_per_vertex_input(state, intrin->variables[0]->var) ||
is_per_vertex_output(state, intrin->variables[0]->var);
- nir_ssa_def *indirect;
+ nir_ssa_def *offset;
nir_ssa_def *vertex_index;
- unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
- per_vertex ? &vertex_index : NULL,
- &indirect, state);
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(state->mem_ctx,
- load_op(state, mode, per_vertex,
- indirect));
+ load_op(state, mode, per_vertex));
load->num_components = intrin->num_components;
- unsigned location = intrin->variables[0]->var->data.driver_location;
- if (mode == nir_var_uniform) {
- load->const_index[0] = location;
- load->const_index[1] = offset;
- } else {
- load->const_index[0] = location + offset;
- }
+ load->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
if (per_vertex)
load->src[0] = nir_src_for_ssa(vertex_index);
- if (indirect)
- load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(indirect);
+ load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset);
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&load->instr, &load->dest,
@@ -255,38 +238,33 @@ nir_lower_io_block(nir_block *block, void *void_state)
case nir_intrinsic_store_var: {
assert(mode == nir_var_shader_out);
- nir_ssa_def *indirect;
+ nir_ssa_def *offset;
nir_ssa_def *vertex_index;
bool per_vertex =
is_per_vertex_output(state, intrin->variables[0]->var);
- unsigned offset = get_io_offset(intrin->variables[0], &intrin->instr,
- per_vertex ? &vertex_index : NULL,
- &indirect, state);
- offset += intrin->variables[0]->var->data.driver_location;
+ offset = get_io_offset(b, intrin->variables[0],
+ per_vertex ? &vertex_index : NULL,
+ state->type_size);
- nir_intrinsic_op store_op;
- if (per_vertex) {
- store_op = indirect ? nir_intrinsic_store_per_vertex_output_indirect
- : nir_intrinsic_store_per_vertex_output;
- } else {
- store_op = indirect ? nir_intrinsic_store_output_indirect
- : nir_intrinsic_store_output;
- }
+ nir_intrinsic_op store_op =
+ per_vertex ? nir_intrinsic_store_per_vertex_output :
+ nir_intrinsic_store_output;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx,
store_op);
store->num_components = intrin->num_components;
- store->const_index[0] = offset;
nir_src_copy(&store->src[0], &intrin->src[0], store);
+ store->const_index[0] =
+ intrin->variables[0]->var->data.driver_location;
+
if (per_vertex)
store->src[1] = nir_src_for_ssa(vertex_index);
- if (indirect)
- store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(indirect);
+ store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset);
nir_instr_insert_before(&intrin->instr, &store->instr);
nir_instr_remove(&intrin->instr);
@@ -330,21 +308,24 @@ nir_lower_io(nir_shader *shader, nir_variable_mode mode,
}
/**
- * Return the indirect source for a load/store indirect intrinsic.
+ * Return the offset soruce for a load/store intrinsic.
*/
nir_src *
-nir_get_io_indirect_src(nir_intrinsic_instr *instr)
+nir_get_io_offset_src(nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
- case nir_intrinsic_load_input_indirect:
- case nir_intrinsic_load_output_indirect:
- case nir_intrinsic_load_uniform_indirect:
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_uniform:
return &instr->src[0];
- case nir_intrinsic_load_per_vertex_input_indirect:
- case nir_intrinsic_load_per_vertex_output_indirect:
- case nir_intrinsic_store_output_indirect:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_per_vertex_input:
+ case nir_intrinsic_load_per_vertex_output:
+ case nir_intrinsic_store_output:
return &instr->src[1];
- case nir_intrinsic_store_per_vertex_output_indirect:
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_store_per_vertex_output:
return &instr->src[2];
default:
return NULL;
@@ -360,11 +341,8 @@ nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
switch (instr->intrinsic) {
case nir_intrinsic_load_per_vertex_input:
case nir_intrinsic_load_per_vertex_output:
- case nir_intrinsic_load_per_vertex_input_indirect:
- case nir_intrinsic_load_per_vertex_output_indirect:
return &instr->src[0];
case nir_intrinsic_store_per_vertex_output:
- case nir_intrinsic_store_per_vertex_output_indirect:
return &instr->src[1];
default:
return NULL;
diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c
index aa124d9e6cc..2f5927f6406 100644
--- a/src/glsl/nir/nir_lower_phis_to_scalar.c
+++ b/src/glsl/nir/nir_lower_phis_to_scalar.c
@@ -91,13 +91,9 @@ is_phi_src_scalarizable(nir_phi_src *src,
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_uniform_indirect:
case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_ubo_indirect:
case nir_intrinsic_load_ssbo:
- case nir_intrinsic_load_ssbo_indirect:
case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_indirect:
return true;
default:
break;
diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c
index 19deafab37a..858088237e3 100644
--- a/src/glsl/nir/nir_lower_samplers.c
+++ b/src/glsl/nir/nir_lower_samplers.c
@@ -25,7 +25,6 @@
#include "nir.h"
#include "nir_builder.h"
-#include "../program.h"
#include "program/hash_table.h"
#include "ir_uniform.h"
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index 6995b9d6bc1..7df12e070f1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -73,6 +73,7 @@ load_input(nir_builder *b, nir_variable *in)
load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input);
load->num_components = 4;
load->const_index[0] = in->data.driver_location;
+ load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_builder_instr_insert(b, &load->instr);
@@ -151,6 +152,7 @@ nir_lower_two_sided_color_block(nir_block *block, void *void_state)
unsigned drvloc =
state->colors[idx].front->data.driver_location;
if (intr->const_index[0] == drvloc) {
+ assert(nir_src_as_const_value(intr->src[0]));
break;
}
}
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index 30ede52b146..3843f21c0ee 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -185,8 +185,10 @@ optimizations = [
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
(('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
# Boolean simplifications
- (('ine', 'a@bool', 0), 'a'),
- (('ieq', 'a@bool', 0), ('inot', 'a')),
+ (('ieq', 'a@bool', True), a),
+ (('ine', 'a@bool', True), ('inot', a)),
+ (('ine', 'a@bool', False), a),
+ (('ieq', 'a@bool', False), ('inot', 'a')),
(('bcsel', a, True, False), ('ine', a, 0)),
(('bcsel', a, False, True), ('ieq', a, 0)),
(('bcsel', True, b, c), b),
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 76bfc47c2a0..10f46cef1de 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -439,21 +439,15 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
switch (instr->intrinsic) {
case nir_intrinsic_load_uniform:
- case nir_intrinsic_load_uniform_indirect:
var_list = &state->shader->uniforms;
break;
case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_indirect:
case nir_intrinsic_load_per_vertex_input:
- case nir_intrinsic_load_per_vertex_input_indirect:
var_list = &state->shader->inputs;
break;
case nir_intrinsic_load_output:
- case nir_intrinsic_load_output_indirect:
case nir_intrinsic_store_output:
- case nir_intrinsic_store_output_indirect:
case nir_intrinsic_store_per_vertex_output:
- case nir_intrinsic_store_per_vertex_output_indirect:
var_list = &state->shader->outputs;
break;
default:
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index d014f3cd811..68edea09309 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -1112,8 +1112,7 @@ nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding,
static struct vtn_ssa_value *
_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
unsigned set, unsigned binding, nir_variable_mode mode,
- nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect,
- struct vtn_type *type)
+ nir_ssa_def *index, nir_ssa_def *offset, struct vtn_type *type)
{
struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value);
val->type = type->type;
@@ -1121,26 +1120,20 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
if (glsl_type_is_vector_or_scalar(type->type)) {
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
load->num_components = glsl_get_vector_elements(type->type);
- load->const_index[0] = offset;
switch (op) {
- case nir_intrinsic_load_ubo_indirect:
- case nir_intrinsic_load_ssbo_indirect:
- load->src[1] = nir_src_for_ssa(indirect);
- /* fall through */
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo: {
nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb,
set, binding,
mode, index);
load->src[0] = nir_src_for_ssa(res_index);
+ load->src[1] = nir_src_for_ssa(offset);
break;
}
case nir_intrinsic_load_push_constant:
- break; /* Nothing to do */
- case nir_intrinsic_load_push_constant_indirect:
- load->src[0] = nir_src_for_ssa(indirect);
+ load->src[0] = nir_src_for_ssa(offset);
break;
default:
@@ -1155,15 +1148,17 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
val->elems = ralloc_array(b, struct vtn_ssa_value *, elems);
if (glsl_type_is_struct(type->type)) {
for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *child_offset =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index,
- offset + type->offsets[i],
- indirect, type->members[i]);
+ child_offset, type->members[i]);
}
} else {
for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *child_offset =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index,
- offset + i * type->stride,
- indirect, type->array_element);
+ child_offset,type->array_element);
}
}
}
@@ -1174,8 +1169,7 @@ _vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op,
static void
vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src,
struct vtn_type **type, nir_deref *src_tail,
- nir_ssa_def **index,
- unsigned *offset, nir_ssa_def **indirect)
+ nir_ssa_def **index, nir_ssa_def **offset)
{
nir_deref *deref = &src->deref;
@@ -1191,27 +1185,30 @@ vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src,
*index = nir_imm_int(&b->nb, 0);
}
- *offset = 0;
- *indirect = NULL;
+ *offset = nir_imm_int(&b->nb, 0);
while (deref != src_tail) {
deref = deref->child;
switch (deref->deref_type) {
case nir_deref_type_array: {
nir_deref_array *deref_array = nir_deref_as_array(deref);
- if (deref_array->deref_array_type == nir_deref_array_type_direct) {
- *offset += (*type)->stride * deref_array->base_offset;
- } else {
- nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa,
- nir_imm_int(&b->nb, (*type)->stride));
- *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off;
- }
+ nir_ssa_def *off = nir_imm_int(&b->nb, deref_array->base_offset);
+
+ if (deref_array->deref_array_type == nir_deref_array_type_indirect)
+ off = nir_iadd(&b->nb, off, deref_array->indirect.ssa);
+
+ off = nir_imul(&b->nb, off, nir_imm_int(&b->nb, (*type)->stride));
+ *offset = nir_iadd(&b->nb, *offset, off);
+
*type = (*type)->array_element;
break;
}
case nir_deref_type_struct: {
nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
- *offset += (*type)->offsets[deref_struct->index];
+
+ unsigned elem_off = (*type)->offsets[deref_struct->index];
+ *offset = nir_iadd(&b->nb, *offset, nir_imm_int(&b->nb, elem_off));
+
*type = (*type)->members[deref_struct->index];
break;
}
@@ -1227,9 +1224,8 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src,
struct vtn_type *type, nir_deref *src_tail)
{
nir_ssa_def *index;
- unsigned offset;
- nir_ssa_def *indirect;
- vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect);
+ nir_ssa_def *offset;
+ vtn_block_get_offset(b, src, &type, src_tail, &index, &offset);
nir_intrinsic_op op;
if (src->var->data.mode == nir_var_uniform) {
@@ -1237,25 +1233,22 @@ vtn_block_load(struct vtn_builder *b, nir_deref_var *src,
/* UBO load */
assert(src->var->data.binding >= 0);
- op = indirect ? nir_intrinsic_load_ubo_indirect
- : nir_intrinsic_load_ubo;
+ op = nir_intrinsic_load_ubo;
} else {
/* Push constant load */
assert(src->var->data.descriptor_set == -1 &&
src->var->data.binding == -1);
- op = indirect ? nir_intrinsic_load_push_constant_indirect
- : nir_intrinsic_load_push_constant;
+ op = nir_intrinsic_load_push_constant;
}
} else {
assert(src->var->data.mode == nir_var_shader_storage);
- op = indirect ? nir_intrinsic_load_ssbo_indirect
- : nir_intrinsic_load_ssbo;
+ op = nir_intrinsic_load_ssbo;
}
return _vtn_block_load(b, op, src->var->data.descriptor_set,
src->var->data.binding, src->var->data.mode,
- index, offset, indirect, type);
+ index, offset, type);
}
/*
@@ -1319,14 +1312,13 @@ vtn_variable_load(struct vtn_builder *b, nir_deref_var *src,
static void
_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op,
struct vtn_ssa_value *src, unsigned set, unsigned binding,
- nir_variable_mode mode, nir_ssa_def *index, unsigned offset,
- nir_ssa_def *indirect, struct vtn_type *type)
+ nir_variable_mode mode, nir_ssa_def *index,
+ nir_ssa_def *offset, struct vtn_type *type)
{
assert(src->type == type->type);
if (glsl_type_is_vector_or_scalar(type->type)) {
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
store->num_components = glsl_get_vector_elements(type->type);
- store->const_index[0] = offset;
store->const_index[1] = (1 << store->num_components) - 1;
store->src[0] = nir_src_for_ssa(src->def);
@@ -1334,24 +1326,24 @@ _vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op,
set, binding,
mode, index);
store->src[1] = nir_src_for_ssa(res_index);
-
- if (op == nir_intrinsic_store_ssbo_indirect)
- store->src[2] = nir_src_for_ssa(indirect);
+ store->src[2] = nir_src_for_ssa(offset);
nir_builder_instr_insert(&b->nb, &store->instr);
} else {
unsigned elems = glsl_get_length(type->type);
if (glsl_type_is_struct(type->type)) {
for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *child_offset =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, type->offsets[i]));
_vtn_block_store(b, op, src->elems[i], set, binding, mode,
- index, offset + type->offsets[i], indirect,
- type->members[i]);
+ index, child_offset, type->members[i]);
}
} else {
for (unsigned i = 0; i < elems; i++) {
+ nir_ssa_def *child_offset =
+ nir_iadd(&b->nb, offset, nir_imm_int(&b->nb, i * type->stride));
_vtn_block_store(b, op, src->elems[i], set, binding, mode,
- index, offset + i * type->stride, indirect,
- type->array_element);
+ index, child_offset, type->array_element);
}
}
}
@@ -1363,16 +1355,14 @@ vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
nir_deref *dest_tail)
{
nir_ssa_def *index;
- unsigned offset;
- nir_ssa_def *indirect;
- vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect);
+ nir_ssa_def *offset;
+ vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset);
- nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect
- : nir_intrinsic_store_ssbo;
+ nir_intrinsic_op op = nir_intrinsic_store_ssbo;
return _vtn_block_store(b, op, src, dest->var->data.descriptor_set,
dest->var->data.binding, dest->var->data.mode,
- index, offset, indirect, type);
+ index, offset, type);
}
static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b,
@@ -1545,7 +1535,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
/* We have exactly one push constant block */
assert(b->shader->num_uniforms == 0);
- b->shader->num_uniforms = vtn_type_block_size(type);
+ b->shader->num_uniforms = vtn_type_block_size(type) * 4;
break;
case SpvStorageClassInput:
var->data.mode = nir_var_shader_in;
diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp
index 9e73f3c44bb..89ce76bed2b 100644
--- a/src/glsl/opt_array_splitting.cpp
+++ b/src/glsl/opt_array_splitting.cpp
@@ -188,6 +188,10 @@ ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
if (entry && !ir->array_index->as_constant())
entry->split = false;
+ /* If the index is also array dereference, visit index. */
+ if (ir->array_index->as_dereference_array())
+ visit_enter(ir->array_index->as_dereference_array());
+
return visit_continue_with_parent;
}
diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp
index 184aaa1c297..fb24a4fad04 100644
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -500,7 +500,8 @@ ir_constant_propagation_visitor::add_constant(ir_assignment *ir)
* the variable value isn't modified between this assignment and the next
* instruction where its value is read.
*/
- if (deref->var->data.mode == ir_var_shader_storage)
+ if (deref->var->data.mode == ir_var_shader_storage ||
+ deref->var->data.mode == ir_var_shader_shared)
return;
entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant);
diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp
index cdfbc340243..56f6a819e1e 100644
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -120,7 +120,8 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
* and we can't be sure that this variable won't be written by another
* thread.
*/
- if (var->data.mode == ir_var_shader_storage)
+ if (var->data.mode == ir_var_shader_storage ||
+ var->data.mode == ir_var_shader_shared)
return visit_continue;
constval = ir->rhs->constant_expression_value();
diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp
index f20699563fd..5d4cb4fe613 100644
--- a/src/glsl/opt_copy_propagation.cpp
+++ b/src/glsl/opt_copy_propagation.cpp
@@ -330,7 +330,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
*/
ir->condition = new(ralloc_parent(ir)) ir_constant(false);
this->progress = true;
- } else if (lhs_var->data.mode != ir_var_shader_storage) {
+ } else if (lhs_var->data.mode != ir_var_shader_storage &&
+ lhs_var->data.mode != ir_var_shader_shared) {
entry = new(this->acp) acp_entry(lhs_var, rhs_var);
this->acp->push_tail(entry);
}
diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp
index 68b70eedf92..53871130e12 100644
--- a/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/src/glsl/opt_dead_builtin_varyings.cpp
@@ -85,7 +85,7 @@ public:
{
ir_variable *var = ir->variable_referenced();
- if (!var || var->data.mode != this->mode)
+ if (!var || var->data.mode != this->mode || !var->type->is_array())
return visit_continue;
if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) {
diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp
index c5be166e75a..c2ce0b94ece 100644
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -75,6 +75,20 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
|| !entry->declaration)
continue;
+ /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5
+ * (Core Profile) spec says:
+ *
+ * "With separable program objects, interfaces between shader
+ * stages may involve the outputs from one program object and the
+ * inputs from a second program object. For such interfaces, it is
+ * not possible to detect mismatches at link time, because the
+ * programs are linked separately. When each such program is
+ * linked, all inputs or outputs interfacing with another program
+ * stage are treated as active."
+ */
+ if (entry->var->data.always_active_io)
+ continue;
+
if (!entry->assign_list.is_empty()) {
/* Remove all the dead assignments to the variable we found.
* Don't do so if it's a shader or function output, though.
diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp
index 7d59c787aed..84266b0cb58 100644
--- a/src/glsl/standalone_scaffolding.cpp
+++ b/src/glsl/standalone_scaffolding.cpp
@@ -69,7 +69,7 @@ _mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr,
void
_mesa_shader_debug(struct gl_context *, GLenum, GLuint *,
- const char *, int)
+ const char *)
{
}
diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h
index a9ca5e4e3d3..f853a187bf4 100644
--- a/src/glsl/standalone_scaffolding.h
+++ b/src/glsl/standalone_scaffolding.h
@@ -52,7 +52,7 @@ _mesa_clear_shader_program_data(struct gl_shader_program *);
extern "C" void
_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
- const char *msg, int len);
+ const char *msg);
static inline gl_shader_stage
_mesa_shader_enum_to_shader_stage(GLenum v)