diff options
Diffstat (limited to 'src/glsl')
42 files changed, 2131 insertions, 740 deletions
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk index 6898fb0d492..59cc8577a6e 100644 --- a/src/glsl/Android.gen.mk +++ b/src/glsl/Android.gen.mk @@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \ $(MESA_TOP)/src/glsl/nir LOCAL_EXPORT_C_INCLUDE_DIRS += \ - $(intermediates)/nir + $(intermediates)/nir \ + $(MESA_TOP)/src/glsl/nir LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ $(LIBGLCPP_GENERATED_FILES) \ diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 957fd6b90ba..0c9fd75d206 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -22,10 +22,12 @@ NIR_FILES = \ nir/glsl_to_nir.h \ nir/glsl_types.cpp \ nir/glsl_types.h \ + nir/builtin_type_macros.h \ nir/nir.c \ nir/nir.h \ nir/nir_array.h \ nir/nir_builder.h \ + nir/nir_clone.c \ nir/nir_constant_expressions.h \ nir/nir_control_flow.c \ nir/nir_control_flow.h \ @@ -102,7 +104,6 @@ LIBGLSL_FILES = \ blob.c \ blob.h \ builtin_functions.cpp \ - builtin_type_macros.h \ builtin_types.cpp \ builtin_variables.cpp \ glsl_parser_extras.cpp \ diff --git a/src/glsl/ast.h b/src/glsl/ast.h index 1b75234d578..3bea63ea0ed 100644 --- a/src/glsl/ast.h +++ b/src/glsl/ast.h @@ -336,7 +336,7 @@ public: array_dimensions.push_tail(&dim->link); } - const bool is_single_dimension() + bool is_single_dimension() const { return this->array_dimensions.tail_pred->prev != NULL && this->array_dimensions.tail_pred->prev->is_head_sentinel(); @@ -350,6 +350,26 @@ public: exec_list array_dimensions; }; +class ast_layout_expression : public ast_node { +public: + ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr) + { + set_location(locp); + layout_const_expressions.push_tail(&expr->link); + } + + bool process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, bool can_be_zero); + + void merge_qualifier(ast_layout_expression *l_expr) + { + layout_const_expressions.append_list(&l_expr->layout_const_expressions); + } + + exec_list layout_const_expressions; +}; + /** * C-style aggregate initialization class * @@ -558,7 +578,7 @@ struct ast_type_qualifier { unsigned precision:2; /** Geometry shader invocations for GL_ARB_gpu_shader5. */ - int invocations; + ast_layout_expression *invocations; /** * Location specified via GL_ARB_explicit_attrib_location layout @@ -566,20 +586,20 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_location is set. */ - int location; + ast_expression *location; /** * Index specified via GL_ARB_explicit_attrib_location layout * * \note * This field is only valid if \c explicit_index is set. */ - int index; + ast_expression *index; /** Maximum output vertices in GLSL 1.50 geometry shaders. */ - int max_vertices; + ast_layout_expression *max_vertices; /** Stream in GLSL 1.50 geometry shaders. */ - unsigned stream; + ast_expression *stream; /** * Input or output primitive type in GLSL 1.50 geometry shaders @@ -593,7 +613,7 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_binding is set. */ - int binding; + ast_expression *binding; /** * Offset specified via GL_ARB_shader_atomic_counter's "offset" @@ -602,14 +622,14 @@ struct ast_type_qualifier { * \note * This field is only valid if \c explicit_offset is set. */ - int offset; + ast_expression *offset; /** * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" * layout qualifier. Element i of this array is only valid if * flags.q.local_size & (1 << i) is set. */ - int local_size[3]; + ast_layout_expression *local_size[3]; /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */ GLenum vertex_spacing; @@ -621,7 +641,7 @@ struct ast_type_qualifier { bool point_mode; /** Tessellation control shader: number of output vertices */ - int vertices; + ast_layout_expression *vertices; /** * Image format specified with an ARB_shader_image_load_store @@ -752,7 +772,7 @@ public: class ast_fully_specified_type : public ast_node { public: virtual void print(void) const; - bool has_qualifiers() const; + bool has_qualifiers(_mesa_glsl_parse_state *state) const; ast_fully_specified_type() : qualifier(), specifier(NULL) { @@ -1093,17 +1113,13 @@ public: class ast_tcs_output_layout : public ast_node { public: - ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices) - : vertices(vertices) + ast_tcs_output_layout(const struct YYLTYPE &locp) { set_location(locp); } virtual ir_rvalue *hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); - -private: - const int vertices; }; @@ -1135,9 +1151,12 @@ private: class ast_cs_input_layout : public ast_node { public: - ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size) + ast_cs_input_layout(const struct YYLTYPE &locp, + ast_layout_expression **local_size) { - memcpy(this->local_size, local_size, sizeof(this->local_size)); + for (int i = 0; i < 3; i++) { + this->local_size[i] = local_size[i]; + } set_location(locp); } @@ -1145,7 +1164,7 @@ public: struct _mesa_glsl_parse_state *state); private: - unsigned local_size[3]; + ast_layout_expression *local_size[3]; }; /*@}*/ diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 65db2618895..52881a4da7a 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2491,7 +2491,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, "uniform block layout qualifiers row_major and " "column_major may not be applied to variables " "outside of uniform blocks"); - } else if (!type->is_matrix()) { + } else if (!type->without_array()->is_matrix()) { /* The OpenGL ES 3.0 conformance tests did not originally allow * matrix layout qualifiers on non-matrices. However, the OpenGL * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were @@ -2502,39 +2502,88 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, "uniform block layout qualifiers row_major and " "column_major applied to non-matrix types may " "be rejected by older compilers"); - } else if (type->is_record()) { - /* We allow 'layout(row_major)' on structure types because it's the only - * way to get row-major layouts on matrices contained in structures. - */ - _mesa_glsl_warning(loc, state, - "uniform block layout qualifiers row_major and " - "column_major applied to structure types is not " - "strictly conformant and may be rejected by other " - "compilers"); } } static bool -validate_binding_qualifier(struct _mesa_glsl_parse_state *state, +process_qualifier_constant(struct _mesa_glsl_parse_state *state, YYLTYPE *loc, - const glsl_type *type, - const ast_type_qualifier *qual) + const char *qual_indentifier, + ast_expression *const_expression, + unsigned *value) +{ + exec_list dummy_instructions; + + if (const_expression == NULL) { + *value = 0; + return true; + } + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + _mesa_glsl_error(loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < 0) { + _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", + qual_indentifier, const_int->value.u[0]); + return false; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + *value = const_int->value.u[0]; + return true; +} + +static bool +validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, + unsigned stream) +{ + if (stream >= state->ctx->Const.MaxVertexStreams) { + _mesa_glsl_error(loc, state, + "invalid stream specified %d is larger than " + "MAX_VERTEX_STREAMS - 1 (%d).", + stream, state->ctx->Const.MaxVertexStreams - 1); + return false; + } + + return true; +} + +static void +apply_explicit_binding(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + ir_variable *var, + const glsl_type *type, + const ast_type_qualifier *qual) { if (!qual->flags.q.uniform && !qual->flags.q.buffer) { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniforms and " "shader storage buffer objects"); - return false; + return; } - if (qual->binding < 0) { - _mesa_glsl_error(loc, state, "binding values must be >= 0"); - return false; + unsigned qual_binding; + if (!process_qualifier_constant(state, loc, "binding", qual->binding, + &qual_binding)) { + return; } const struct gl_context *const ctx = state->ctx; unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; - unsigned max_index = qual->binding + elements - 1; + unsigned max_index = qual_binding + elements - 1; const glsl_type *base_type = type->without_array(); if (base_type->is_interface()) { @@ -2550,11 +2599,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, */ if (qual->flags.q.uniform && max_index >= ctx->Const.MaxUniformBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds " + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " "the maximum number of UBO binding points (%d)", - qual->binding, elements, + qual_binding, elements, ctx->Const.MaxUniformBufferBindings); - return false; + return; } /* SSBOs. From page 67 of the GLSL 4.30 specification: @@ -2568,11 +2617,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, */ if (qual->flags.q.buffer && max_index >= ctx->Const.MaxShaderStorageBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds " + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " "the maximum number of SSBO binding points (%d)", - qual->binding, elements, + qual_binding, elements, ctx->Const.MaxShaderStorageBufferBindings); - return false; + return; } } else if (base_type->is_sampler()) { /* Samplers. From page 63 of the GLSL 4.20 specification: @@ -2587,19 +2636,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, if (max_index >= limit) { _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " "exceeds the maximum number of texture image units " - "(%d)", qual->binding, elements, limit); + "(%u)", qual_binding, elements, limit); - return false; + return; } } else if (base_type->contains_atomic()) { assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); - if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) { + if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " " maximum number of atomic counter buffer bindings" - "(%d)", qual->binding, + "(%u)", qual_binding, ctx->Const.MaxAtomicBufferBindings); - return false; + return; } } else if (state->is_version(420, 310) && base_type->is_image()) { assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); @@ -2607,17 +2656,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state, _mesa_glsl_error(loc, state, "Image binding %d exceeds the " " maximum number of image units (%d)", max_index, ctx->Const.MaxImageUnits); - return false; + return; } } else { _mesa_glsl_error(loc, state, "the \"binding\" qualifier only applies to uniform " "blocks, opaque variables, or arrays thereof"); - return false; + return; } - return true; + var->data.explicit_binding = true; + var->data.binding = qual_binding; + + return; } @@ -2660,20 +2712,26 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, static void -validate_explicit_location(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) +apply_explicit_location(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { bool fail = false; + unsigned qual_location; + if (!process_qualifier_constant(state, loc, "location", qual->location, + &qual_location)) { + return; + } + /* Checks for GL_ARB_explicit_uniform_location. */ if (qual->flags.q.uniform) { if (!state->check_explicit_uniform_location_allowed(loc, var)) return; const struct gl_context *const ctx = state->ctx; - unsigned max_loc = qual->location + var->type->uniform_locations() - 1; + unsigned max_loc = qual_location + var->type->uniform_locations() - 1; if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " @@ -2683,7 +2741,7 @@ validate_explicit_location(const struct ast_type_qualifier *qual, } var->data.explicit_location = true; - var->data.location = qual->location; + var->data.location = qual_location; return; } @@ -2768,30 +2826,40 @@ validate_explicit_location(const struct ast_type_qualifier *qual, switch (state->stage) { case MESA_SHADER_VERTEX: var->data.location = (var->data.mode == ir_var_shader_in) - ? (qual->location + VERT_ATTRIB_GENERIC0) - : (qual->location + VARYING_SLOT_VAR0); + ? (qual_location + VERT_ATTRIB_GENERIC0) + : (qual_location + VARYING_SLOT_VAR0); break; case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: if (var->data.patch) - var->data.location = qual->location + VARYING_SLOT_PATCH0; + var->data.location = qual_location + VARYING_SLOT_PATCH0; else - var->data.location = qual->location + VARYING_SLOT_VAR0; + var->data.location = qual_location + VARYING_SLOT_VAR0; break; case MESA_SHADER_FRAGMENT: var->data.location = (var->data.mode == ir_var_shader_out) - ? (qual->location + FRAG_RESULT_DATA0) - : (qual->location + VARYING_SLOT_VAR0); + ? (qual_location + FRAG_RESULT_DATA0) + : (qual_location + VARYING_SLOT_VAR0); break; case MESA_SHADER_COMPUTE: assert(!"Unexpected shader type"); break; } - if (qual->flags.q.explicit_index) { + /* Check if index was set for the uniform instead of the function */ + if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { + _mesa_glsl_error(loc, state, "an index qualifier can only be " + "used with subroutine functions"); + return; + } + + unsigned qual_index; + if (qual->flags.q.explicit_index && + process_qualifier_constant(state, loc, "index", qual->index, + &qual_index)) { /* From the GLSL 4.30 specification, section 4.4.2 (Output * Layout Qualifiers): * @@ -2801,12 +2869,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual, * Older specifications don't mandate a behavior; we take * this as a clarification and always generate the error. */ - if (qual->index < 0 || qual->index > 1) { + if (qual_index > 1) { _mesa_glsl_error(loc, state, "explicit index may only be 0 or 1"); } else { var->data.explicit_index = true; - var->data.index = qual->index; + var->data.index = qual_index; } } } @@ -2939,6 +3007,221 @@ validate_array_dimensions(const glsl_type *t, } static void +apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { + + /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: + * + * "Within any shader, the first redeclarations of gl_FragCoord + * must appear before any use of gl_FragCoord." + * + * Generate a compiler error if above condition is not met by the + * fragment shader. + */ + ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); + if (earlier != NULL && + earlier->data.used && + !state->fs_redeclares_gl_fragcoord) { + _mesa_glsl_error(loc, state, + "gl_FragCoord used before its first redeclaration " + "in fragment shader"); + } + + /* Make sure all gl_FragCoord redeclarations specify the same layout + * qualifiers. + */ + if (is_conflicting_fragcoord_redeclaration(state, qual)) { + const char *const qual_string = + get_layout_qualifier_string(qual->flags.q.origin_upper_left, + qual->flags.q.pixel_center_integer); + + const char *const state_string = + get_layout_qualifier_string(state->fs_origin_upper_left, + state->fs_pixel_center_integer); + + _mesa_glsl_error(loc, state, + "gl_FragCoord redeclared with different layout " + "qualifiers (%s) and (%s) ", + state_string, + qual_string); + } + state->fs_origin_upper_left = qual->flags.q.origin_upper_left; + state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = + !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord = + state->fs_origin_upper_left || + state->fs_pixel_center_integer || + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + } + + var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; + var->data.origin_upper_left = qual->flags.q.origin_upper_left; + if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) + && (strcmp(var->name, "gl_FragCoord") != 0)) { + const char *const qual_string = (qual->flags.q.origin_upper_left) + ? "origin_upper_left" : "pixel_center_integer"; + + _mesa_glsl_error(loc, state, + "layout qualifier `%s' can only be applied to " + "fragment shader input `gl_FragCoord'", + qual_string); + } + + if (qual->flags.q.explicit_location) { + apply_explicit_location(qual, var, state, loc); + } else if (qual->flags.q.explicit_index) { + if (!qual->flags.q.subroutine_def) + _mesa_glsl_error(loc, state, + "explicit index requires explicit location"); + } + + if (qual->flags.q.explicit_binding) { + apply_explicit_binding(state, loc, var, var->type, qual); + } + + if (state->stage == MESA_SHADER_GEOMETRY && + qual->flags.q.out && qual->flags.q.stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, loc, "stream", qual->stream, + &qual_stream) && + validate_stream_qualifier(loc, state, qual_stream)) { + var->data.stream = qual_stream; + } + } + + if (var->type->contains_atomic()) { + if (var->data.mode == ir_var_uniform) { + if (var->data.explicit_binding) { + unsigned *offset = + &state->atomic_counter_offsets[var->data.binding]; + + if (*offset % ATOMIC_COUNTER_SIZE) + _mesa_glsl_error(loc, state, + "misaligned atomic counter offset"); + + var->data.atomic.offset = *offset; + *offset += var->type->atomic_size(); + + } else { + _mesa_glsl_error(loc, state, + "atomic counters require explicit binding point"); + } + } else if (var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "atomic counters may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. + * Many implementations of GL_ARB_fragment_coord_conventions_enable and some + * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable + * allowed the layout qualifier to be used with 'varying' and 'attribute'. + * These extensions and all following extensions that add the 'layout' + * keyword have been modified to require the use of 'in' or 'out'. + * + * The following extension do not allow the deprecated keywords: + * + * GL_AMD_conservative_depth + * GL_ARB_conservative_depth + * GL_ARB_gpu_shader5 + * GL_ARB_separate_shader_objects + * GL_ARB_tessellation_shader + * GL_ARB_transform_feedback3 + * GL_ARB_uniform_buffer_object + * + * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 + * allow layout with the deprecated keywords. + */ + const bool relaxed_layout_qualifier_checking = + state->ARB_fragment_coord_conventions_enable; + + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + if (qual->has_layout() && uses_deprecated_qualifier) { + if (relaxed_layout_qualifier_checking) { + _mesa_glsl_warning(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } else { + _mesa_glsl_error(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } + } + + /* Layout qualifiers for gl_FragDepth, which are enabled by extension + * AMD_conservative_depth. + */ + int depth_layout_count = qual->flags.q.depth_any + + qual->flags.q.depth_greater + + qual->flags.q.depth_less + + qual->flags.q.depth_unchanged; + if (depth_layout_count > 0 + && !state->AMD_conservative_depth_enable + && !state->ARB_conservative_depth_enable) { + _mesa_glsl_error(loc, state, + "extension GL_AMD_conservative_depth or " + "GL_ARB_conservative_depth must be enabled " + "to use depth layout qualifiers"); + } else if (depth_layout_count > 0 + && strcmp(var->name, "gl_FragDepth") != 0) { + _mesa_glsl_error(loc, state, + "depth layout qualifiers can be applied only to " + "gl_FragDepth"); + } else if (depth_layout_count > 1 + && strcmp(var->name, "gl_FragDepth") == 0) { + _mesa_glsl_error(loc, state, + "at most one depth layout qualifier can be applied to " + "gl_FragDepth"); + } + if (qual->flags.q.depth_any) + var->data.depth_layout = ir_depth_layout_any; + else if (qual->flags.q.depth_greater) + var->data.depth_layout = ir_depth_layout_greater; + else if (qual->flags.q.depth_less) + var->data.depth_layout = ir_depth_layout_less; + else if (qual->flags.q.depth_unchanged) + var->data.depth_layout = ir_depth_layout_unchanged; + else + var->data.depth_layout = ir_depth_layout_none; + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(loc, state, + "uniform and shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform or shader storage blocks, not " + "members"); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + validate_matrix_layout_for_type(state, loc, var->type, var); + } + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } +} + +static void apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, ir_variable *var, struct _mesa_glsl_parse_state *state, @@ -2992,11 +3275,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, select_gles_precision(qual->precision, var->type, state, loc); } - if (state->stage == MESA_SHADER_GEOMETRY && - qual->flags.q.out && qual->flags.q.stream) { - var->data.stream = qual->stream; - } - if (qual->flags.q.patch) var->data.patch = 1; @@ -3136,102 +3414,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, state, loc); - var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; - var->data.origin_upper_left = qual->flags.q.origin_upper_left; - if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) - && (strcmp(var->name, "gl_FragCoord") != 0)) { - const char *const qual_string = (qual->flags.q.origin_upper_left) - ? "origin_upper_left" : "pixel_center_integer"; - - _mesa_glsl_error(loc, state, - "layout qualifier `%s' can only be applied to " - "fragment shader input `gl_FragCoord'", - qual_string); - } - - if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { - - /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: - * - * "Within any shader, the first redeclarations of gl_FragCoord - * must appear before any use of gl_FragCoord." - * - * Generate a compiler error if above condition is not met by the - * fragment shader. - */ - ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); - if (earlier != NULL && - earlier->data.used && - !state->fs_redeclares_gl_fragcoord) { - _mesa_glsl_error(loc, state, - "gl_FragCoord used before its first redeclaration " - "in fragment shader"); - } - - /* Make sure all gl_FragCoord redeclarations specify the same layout - * qualifiers. - */ - if (is_conflicting_fragcoord_redeclaration(state, qual)) { - const char *const qual_string = - get_layout_qualifier_string(qual->flags.q.origin_upper_left, - qual->flags.q.pixel_center_integer); - - const char *const state_string = - get_layout_qualifier_string(state->fs_origin_upper_left, - state->fs_pixel_center_integer); - - _mesa_glsl_error(loc, state, - "gl_FragCoord redeclared with different layout " - "qualifiers (%s) and (%s) ", - state_string, - qual_string); - } - state->fs_origin_upper_left = qual->flags.q.origin_upper_left; - state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = - !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord = - state->fs_origin_upper_left || - state->fs_pixel_center_integer || - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; - } - - if (qual->flags.q.explicit_location) { - validate_explicit_location(qual, var, state, loc); - } else if (qual->flags.q.explicit_index) { - _mesa_glsl_error(loc, state, "explicit index requires explicit location"); - } - - if (qual->flags.q.explicit_binding && - validate_binding_qualifier(state, loc, var->type, qual)) { - var->data.explicit_binding = true; - var->data.binding = qual->binding; - } - - if (var->type->contains_atomic()) { - if (var->data.mode == ir_var_uniform) { - if (var->data.explicit_binding) { - unsigned *offset = - &state->atomic_counter_offsets[var->data.binding]; - - if (*offset % ATOMIC_COUNTER_SIZE) - _mesa_glsl_error(loc, state, - "misaligned atomic counter offset"); - - var->data.atomic.offset = *offset; - *offset += var->type->atomic_size(); - - } else { - _mesa_glsl_error(loc, state, - "atomic counters require explicit binding point"); - } - } else if (var->data.mode != ir_var_function_in) { - _mesa_glsl_error(loc, state, "atomic counters may only be declared as " - "function parameters or uniform-qualified " - "global variables"); - } - } - /* Does the declaration use the deprecated 'attribute' or 'varying' * keywords? */ @@ -3267,114 +3449,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, "`out' or `varying' variables between shader stages"); } - - /* Is the 'layout' keyword used with parameters that allow relaxed checking. - * Many implementations of GL_ARB_fragment_coord_conventions_enable and some - * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable - * allowed the layout qualifier to be used with 'varying' and 'attribute'. - * These extensions and all following extensions that add the 'layout' - * keyword have been modified to require the use of 'in' or 'out'. - * - * The following extension do not allow the deprecated keywords: - * - * GL_AMD_conservative_depth - * GL_ARB_conservative_depth - * GL_ARB_gpu_shader5 - * GL_ARB_separate_shader_objects - * GL_ARB_tessellation_shader - * GL_ARB_transform_feedback3 - * GL_ARB_uniform_buffer_object - * - * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 - * allow layout with the deprecated keywords. - */ - const bool relaxed_layout_qualifier_checking = - state->ARB_fragment_coord_conventions_enable; - - if (qual->has_layout() && uses_deprecated_qualifier) { - if (relaxed_layout_qualifier_checking) { - _mesa_glsl_warning(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } else { - _mesa_glsl_error(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } - } - - /* Layout qualifiers for gl_FragDepth, which are enabled by extension - * AMD_conservative_depth. - */ - int depth_layout_count = qual->flags.q.depth_any - + qual->flags.q.depth_greater - + qual->flags.q.depth_less - + qual->flags.q.depth_unchanged; - if (depth_layout_count > 0 - && !state->AMD_conservative_depth_enable - && !state->ARB_conservative_depth_enable) { - _mesa_glsl_error(loc, state, - "extension GL_AMD_conservative_depth or " - "GL_ARB_conservative_depth must be enabled " - "to use depth layout qualifiers"); - } else if (depth_layout_count > 0 - && strcmp(var->name, "gl_FragDepth") != 0) { - _mesa_glsl_error(loc, state, - "depth layout qualifiers can be applied only to " - "gl_FragDepth"); - } else if (depth_layout_count > 1 - && strcmp(var->name, "gl_FragDepth") == 0) { - _mesa_glsl_error(loc, state, - "at most one depth layout qualifier can be applied to " - "gl_FragDepth"); - } - if (qual->flags.q.depth_any) - var->data.depth_layout = ir_depth_layout_any; - else if (qual->flags.q.depth_greater) - var->data.depth_layout = ir_depth_layout_greater; - else if (qual->flags.q.depth_less) - var->data.depth_layout = ir_depth_layout_less; - else if (qual->flags.q.depth_unchanged) - var->data.depth_layout = ir_depth_layout_unchanged; - else - var->data.depth_layout = ir_depth_layout_none; - - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { - _mesa_glsl_error(loc, state, - "uniform and shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform or shader storage blocks, not " - "members"); - } - if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { _mesa_glsl_error(loc, state, "the shared storage qualifiers can only be used with " "compute shaders"); } - if (qual->flags.q.row_major || qual->flags.q.column_major) { - validate_matrix_layout_for_type(state, loc, var->type, var); - } - apply_image_qualifier_to_variable(qual, var, state, loc); - - /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader - * Inputs): - * - * "Fragment shaders also allow the following layout qualifier on in only - * (not with variable declarations) - * layout-qualifier-id - * early_fragment_tests - * [...]" - */ - if (qual->flags.q.early_fragment_tests) { - _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " - "valid in fragment shader input layout declaration."); - } } /** @@ -3798,7 +3879,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, unsigned num_vertices = 0; if (state->tcs_output_vertices_specified) { - num_vertices = state->out_qualifier->vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", + &num_vertices, false)) { + return; + } + + if (num_vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", num_vertices); + return; + } } if (!var->type->is_array() && !var->data.patch) { @@ -4032,9 +4123,18 @@ ast_declarator_list::hir(exec_list *instructions, */ if (decl_type && decl_type->contains_atomic()) { if (type->qualifier.flags.q.explicit_binding && - type->qualifier.flags.q.explicit_offset) - state->atomic_counter_offsets[type->qualifier.binding] = - type->qualifier.offset; + type->qualifier.flags.q.explicit_offset) { + unsigned qual_binding; + unsigned qual_offset; + if (process_qualifier_constant(state, &loc, "binding", + type->qualifier.binding, + &qual_binding) + && process_qualifier_constant(state, &loc, "offset", + type->qualifier.offset, + &qual_offset)) { + state->atomic_counter_offsets[qual_binding] = qual_offset; + } + } } if (this->declarations.is_empty()) { @@ -4188,6 +4288,8 @@ ast_declarator_list::hir(exec_list *instructions, apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, false); + apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, + &loc); if (this->type->qualifier.flags.q.invariant) { if (!is_varying_var(var, state->stage)) { @@ -4983,7 +5085,7 @@ ast_function::hir(exec_list *instructions, /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: * "No qualifier is allowed on the return type of a function." */ - if (this->return_type->has_qualifiers()) { + if (this->return_type->has_qualifiers(state)) { YYLTYPE loc = this->get_location(); _mesa_glsl_error(& loc, state, "function `%s' return type has qualifiers", name); @@ -5115,6 +5217,27 @@ ast_function::hir(exec_list *instructions, if (this->return_type->qualifier.flags.q.subroutine_def) { int idx; + if (this->return_type->qualifier.flags.q.explicit_index) { + unsigned qual_index; + if (process_qualifier_constant(state, &loc, "index", + this->return_type->qualifier.index, + &qual_index)) { + if (!state->has_explicit_uniform_location()) { + _mesa_glsl_error(&loc, state, "subroutine index requires " + "GL_ARB_explicit_uniform_location or " + "GLSL 4.30"); + } else if (qual_index >= MAX_SUBROUTINES) { + _mesa_glsl_error(&loc, state, + "invalid subroutine index (%d) index must " + "be a number between 0 and " + "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, + MAX_SUBROUTINES - 1); + } else { + f->subroutine_index = qual_index; + } + } + } + f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); f->subroutine_types = ralloc_array(state, const struct glsl_type *, f->num_subroutine_types); @@ -6046,27 +6169,19 @@ ast_type_specifier::hir(exec_list *instructions, * stored in \c *fields_ret. */ unsigned -ast_process_structure_or_interface_block(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - exec_list *declarations, - YYLTYPE &loc, - glsl_struct_field **fields_ret, - bool is_interface, - enum glsl_matrix_layout matrix_layout, - bool allow_reserved_names, - ir_variable_mode var_mode, - ast_type_qualifier *layout) +ast_process_struct_or_iface_block_members(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + glsl_struct_field **fields_ret, + bool is_interface, + enum glsl_matrix_layout matrix_layout, + bool allow_reserved_names, + ir_variable_mode var_mode, + ast_type_qualifier *layout, + unsigned block_stream) { unsigned decl_count = 0; - /* For blocks that accept memory qualifiers (i.e. shader storage), verify - * that we don't have incompatible qualifiers - */ - if (layout && layout->flags.q.read_only && layout->flags.q.write_only) { - _mesa_glsl_error(&loc, state, - "Interface block sets both readonly and writeonly"); - } - /* Make an initial pass over the list of fields to determine how * many there are. Each element in this list is an ast_declarator_list. * This means that we actually need to count the number of elements in the @@ -6087,6 +6202,7 @@ ast_process_structure_or_interface_block(exec_list *instructions, unsigned i = 0; foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { const char *type_name; + YYLTYPE loc = decl_list->get_location(); decl_list->type->specifier->hir(instructions, state); @@ -6101,74 +6217,120 @@ ast_process_structure_or_interface_block(exec_list *instructions, const glsl_type *decl_type = decl_list->type->glsl_type(& type_name, state); - foreach_list_typed (ast_declaration, decl, link, - &decl_list->declarations) { - if (!allow_reserved_names) - validate_identifier(decl->identifier, loc, state); + const struct ast_type_qualifier *const qual = + &decl_list->type->qualifier; - /* From section 4.3.9 of the GLSL 4.40 spec: - * - * "[In interface blocks] opaque types are not allowed." + /* From section 4.3.9 of the GLSL 4.40 spec: + * + * "[In interface blocks] opaque types are not allowed." + * + * It should be impossible for decl_type to be NULL here. Cases that + * might naturally lead to decl_type being NULL, especially for the + * is_interface case, will have resulted in compilation having + * already halted due to a syntax error. + */ + assert(decl_type); + + if (is_interface && decl_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "uniform/buffer in non-default interface block contains " + "opaque variable"); + } + + if (decl_type->contains_atomic()) { + /* From section 4.1.7.3 of the GLSL 4.40 spec: * - * It should be impossible for decl_type to be NULL here. Cases that - * might naturally lead to decl_type being NULL, especially for the - * is_interface case, will have resulted in compilation having - * already halted due to a syntax error. + * "Members of structures cannot be declared as atomic counter + * types." */ - assert(decl_type); + _mesa_glsl_error(&loc, state, "atomic counter in structure, " + "shader storage block or uniform block"); + } - if (is_interface && decl_type->contains_opaque()) { - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "uniform/buffer in non-default interface block contains " - "opaque variable"); - } + if (decl_type->contains_image()) { + /* FINISHME: Same problem as with atomic counters. + * FINISHME: Request clarification from Khronos and add + * FINISHME: spec quotation here. + */ + _mesa_glsl_error(&loc, state, + "image in structure, shader storage block or " + "uniform block"); + } - if (decl_type->contains_atomic()) { - /* From section 4.1.7.3 of the GLSL 4.40 spec: - * - * "Members of structures cannot be declared as atomic counter - * types." - */ - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, "atomic counter in structure, " - "shader storage block or uniform block"); - } + if (qual->flags.q.explicit_binding) { + _mesa_glsl_error(&loc, state, + "binding layout qualifier cannot be applied " + "to struct or interface block members"); + } - if (decl_type->contains_image()) { - /* FINISHME: Same problem as with atomic counters. - * FINISHME: Request clarification from Khronos and add - * FINISHME: spec quotation here. - */ - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "image in structure, shader storage block or " - "uniform block"); + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform/shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform/shader storage blocks, not " + "members"); + } + + if (qual->flags.q.constant) { + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + + /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: + * + * "A block member may be declared with a stream identifier, but + * the specified stream must match the stream associated with the + * containing block." + */ + if (qual->flags.q.explicit_stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, &loc, "stream", + qual->stream, &qual_stream) && + qual_stream != block_stream) { + _mesa_glsl_error(&loc, state, "stream layout qualifier on " + "interface block member does not match " + "the interface block (%d vs %d)", qual->stream, + block_stream); } + } - const struct ast_type_qualifier *const qual = - & decl_list->type->qualifier; + if (qual->flags.q.uniform && qual->has_interpolation()) { + _mesa_glsl_error(&loc, state, + "interpolation qualifiers cannot be used " + "with uniform interface blocks"); + } - if (qual->flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, decl_type, qual); + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { _mesa_glsl_error(&loc, state, - "uniform/shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform/shader storage blocks, not " - "members"); - } + "row_major and column_major can only be " + "applied to interface blocks"); + } else + validate_matrix_layout_for_type(state, &loc, decl_type, NULL); + } - if (qual->flags.q.constant) { - YYLTYPE loc = decl_list->get_location(); - _mesa_glsl_error(&loc, state, - "const storage qualifier cannot be applied " - "to struct or interface block members"); - } + if (qual->flags.q.read_only && qual->flags.q.write_only) { + _mesa_glsl_error(&loc, state, "buffer variable can't be both " + "readonly and writeonly."); + } + + foreach_list_typed (ast_declaration, decl, link, + &decl_list->declarations) { + YYLTYPE loc = decl->get_location(); + + if (!allow_reserved_names) + validate_identifier(decl->identifier, loc, state); const struct glsl_type *field_type = process_array_type(&loc, decl_type, decl->array_specifier, state); @@ -6183,42 +6345,6 @@ ast_process_structure_or_interface_block(exec_list *instructions, fields[i].patch = qual->flags.q.patch ? 1 : 0; fields[i].precision = qual->precision; - /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: - * - * "A block member may be declared with a stream identifier, but - * the specified stream must match the stream associated with the - * containing block." - */ - if (qual->flags.q.explicit_stream && - qual->stream != layout->stream) { - _mesa_glsl_error(&loc, state, "stream layout qualifier on " - "interface block member `%s' does not match " - "the interface block (%d vs %d)", - fields[i].name, qual->stream, layout->stream); - } - - if (qual->flags.q.row_major || qual->flags.q.column_major) { - if (!qual->flags.q.uniform && !qual->flags.q.buffer) { - _mesa_glsl_error(&loc, state, - "row_major and column_major can only be " - "applied to interface blocks"); - } else - validate_matrix_layout_for_type(state, &loc, field_type, NULL); - } - - if (qual->flags.q.uniform && qual->has_interpolation()) { - _mesa_glsl_error(&loc, state, - "interpolation qualifiers cannot be used " - "with uniform interface blocks"); - } - - if ((qual->flags.q.uniform || !is_interface) && - qual->has_auxiliary_storage()) { - _mesa_glsl_error(&loc, state, - "auxiliary storage qualifiers cannot be used " - "in uniform blocks or structures."); - } - /* Propogate row- / column-major information down the fields of the * structure or interface block. Structures need this data because * the structure may contain a structure that contains ... a matrix @@ -6248,29 +6374,20 @@ ast_process_structure_or_interface_block(exec_list *instructions, * be defined inside shader storage buffer objects */ if (layout && var_mode == ir_var_shader_storage) { - if (qual->flags.q.read_only && qual->flags.q.write_only) { - _mesa_glsl_error(&loc, state, - "buffer variable `%s' can't be " - "readonly and writeonly.", fields[i].name); - } - /* For readonly and writeonly qualifiers the field definition, * if set, overwrites the layout qualifier. */ - bool read_only = layout->flags.q.read_only; - bool write_only = layout->flags.q.write_only; - if (qual->flags.q.read_only) { - read_only = true; - write_only = false; + fields[i].image_read_only = true; + fields[i].image_write_only = false; } else if (qual->flags.q.write_only) { - read_only = false; - write_only = true; + fields[i].image_read_only = false; + fields[i].image_write_only = true; + } else { + fields[i].image_read_only = layout->flags.q.read_only; + fields[i].image_write_only = layout->flags.q.write_only; } - fields[i].image_read_only = read_only; - fields[i].image_write_only = write_only; - /* For other qualifiers, we set the flag if either the layout * qualifier or the field qualifier are set */ @@ -6328,16 +6445,16 @@ ast_struct_specifier::hir(exec_list *instructions, glsl_struct_field *fields; unsigned decl_count = - ast_process_structure_or_interface_block(instructions, - state, - &this->declarations, - loc, - &fields, - false, - GLSL_MATRIX_LAYOUT_INHERITED, - false /* allow_reserved_names */, - ir_var_auto, - NULL); + ast_process_struct_or_iface_block_members(instructions, + state, + &this->declarations, + &fields, + false, + GLSL_MATRIX_LAYOUT_INHERITED, + false /* allow_reserved_names */, + ir_var_auto, + NULL, + 0 /* for interface only */); validate_identifier(this->name, loc, state); @@ -6483,17 +6600,36 @@ ast_interface_block::hir(exec_list *instructions, */ state->struct_specifier_depth++; + /* For blocks that accept memory qualifiers (i.e. shader storage), verify + * that we don't have incompatible qualifiers + */ + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { + _mesa_glsl_error(&loc, state, + "Interface block sets both readonly and writeonly"); + } + + unsigned qual_stream; + if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, + &qual_stream) || + !validate_stream_qualifier(&loc, state, qual_stream)) { + /* If the stream qualifier is invalid it doesn't make sense to continue + * on and try to compare stream layouts on member variables against it + * so just return early. + */ + return NULL; + } + unsigned int num_variables = - ast_process_structure_or_interface_block(&declared_variables, - state, - &this->declarations, - loc, - &fields, - true, - matrix_layout, - redeclaring_per_vertex, - var_mode, - &this->layout); + ast_process_struct_or_iface_block_members(&declared_variables, + state, + &this->declarations, + &fields, + true, + matrix_layout, + redeclaring_per_vertex, + var_mode, + &this->layout, + qual_stream); state->struct_specifier_depth--; @@ -6604,6 +6740,8 @@ ast_interface_block::hir(exec_list *instructions, earlier_per_vertex->fields.structure[j].sample; fields[i].patch = earlier_per_vertex->fields.structure[j].patch; + fields[i].precision = + earlier_per_vertex->fields.structure[j].precision; } } @@ -6633,8 +6771,6 @@ ast_interface_block::hir(exec_list *instructions, num_variables, packing, this->block_name); - if (this->layout.flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, block_type, &this->layout); if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { YYLTYPE loc = this->get_location(); @@ -6765,10 +6901,6 @@ ast_interface_block::hir(exec_list *instructions, "not allowed"); } - if (this->layout.flags.q.explicit_binding) - validate_binding_qualifier(state, &loc, block_array_type, - &this->layout); - var = new(state) ir_variable(block_array_type, this->instance_name, var_mode); @@ -6830,14 +6962,12 @@ ast_interface_block::hir(exec_list *instructions, earlier->reinit_interface_type(block_type); delete var; } else { - /* Propagate the "binding" keyword into this UBO's fields; - * the UBO declaration itself doesn't get an ir_variable unless it - * has an instance name. This is ugly. - */ - var->data.explicit_binding = this->layout.flags.q.explicit_binding; - var->data.binding = this->layout.binding; + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, var->type, + &this->layout); + } - var->data.stream = this->layout.stream; + var->data.stream = qual_stream; state->symbols->add_variable(var); instructions->push_tail(var); @@ -6857,7 +6987,7 @@ ast_interface_block::hir(exec_list *instructions, var->data.centroid = fields[i].centroid; var->data.sample = fields[i].sample; var->data.patch = fields[i].patch; - var->data.stream = this->layout.stream; + var->data.stream = qual_stream; var->init_interface_type(block_type); if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) @@ -6914,8 +7044,10 @@ ast_interface_block::hir(exec_list *instructions, * The UBO declaration itself doesn't get an ir_variable unless it * has an instance name. This is ugly. */ - var->data.explicit_binding = this->layout.flags.q.explicit_binding; - var->data.binding = this->layout.binding; + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, + var->get_interface_type(), &this->layout); + } if (var->type->is_unsized_array()) { if (var->is_in_shader_storage_block()) { @@ -6997,22 +7129,18 @@ ast_tcs_output_layout::hir(exec_list *instructions, { YYLTYPE loc = this->get_location(); - /* If any tessellation control output layout declaration preceded this - * one, make sure it was consistent with this one. - */ - if (state->tcs_output_vertices_specified && - state->out_qualifier->vertices != this->vertices) { - _mesa_glsl_error(&loc, state, - "tessellation control shader output layout does not " - "match previous declaration"); - return NULL; + unsigned num_vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &num_vertices, + false)) { + /* return here to stop cascading incorrect error messages */ + return NULL; } /* If any shader outputs occurred before this declaration and specified an * array size, make sure the size they specified is consistent with the * primitive type. */ - unsigned num_vertices = this->vertices; if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { _mesa_glsl_error(&loc, state, "this tessellation control shader output layout " @@ -7120,20 +7248,6 @@ ast_cs_input_layout::hir(exec_list *instructions, { YYLTYPE loc = this->get_location(); - /* If any compute input layout declaration preceded this one, make sure it - * was consistent with this one. - */ - if (state->cs_input_local_size_specified) { - for (int i = 0; i < 3; i++) { - if (state->cs_input_local_size[i] != this->local_size[i]) { - _mesa_glsl_error(&loc, state, - "compute shader input layout does not match" - " previous declaration"); - return NULL; - } - } - } - /* From the ARB_compute_shader specification: * * If the local size of the shader in any dimension is greater @@ -7146,15 +7260,30 @@ ast_cs_input_layout::hir(exec_list *instructions, * report it at compile time as well. */ GLuint64 total_invocations = 1; + unsigned qual_local_size[3]; for (int i = 0; i < 3; i++) { - if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + + char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", + 'x' + i); + /* Infer a local_size of 1 for unspecified dimensions */ + if (this->local_size[i] == NULL) { + qual_local_size[i] = 1; + } else if (!this->local_size[i]-> + process_qualifier_constant(state, local_size_str, + &qual_local_size[i], false)) { + ralloc_free(local_size_str); + return NULL; + } + ralloc_free(local_size_str); + + if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { _mesa_glsl_error(&loc, state, "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" " (%d)", 'x' + i, state->ctx->Const.MaxComputeWorkGroupSize[i]); break; } - total_invocations *= this->local_size[i]; + total_invocations *= qual_local_size[i]; if (total_invocations > state->ctx->Const.MaxComputeWorkGroupInvocations) { _mesa_glsl_error(&loc, state, @@ -7165,9 +7294,23 @@ ast_cs_input_layout::hir(exec_list *instructions, } } + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != qual_local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + state->cs_input_local_size_specified = true; for (int i = 0; i < 3; i++) - state->cs_input_local_size[i] = this->local_size[i]; + state->cs_input_local_size[i] = qual_local_size[i]; /* We may now declare the built-in constant gl_WorkGroupSize (see * builtin_variable_generator::generate_constants() for why we didn't @@ -7182,7 +7325,7 @@ ast_cs_input_layout::hir(exec_list *instructions, ir_constant_data data; memset(&data, 0, sizeof(data)); for (int i = 0; i < 3; i++) - data.u[i] = this->local_size[i]; + data.u[i] = qual_local_size[i]; var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); var->constant_initializer = new(var) ir_constant(glsl_type::uvec3_type, &data); @@ -7198,6 +7341,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, { bool gl_FragColor_assigned = false; bool gl_FragData_assigned = false; + bool gl_FragSecondaryColor_assigned = false; + bool gl_FragSecondaryData_assigned = false; bool user_defined_fs_output_assigned = false; ir_variable *user_defined_fs_output = NULL; @@ -7215,6 +7360,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, gl_FragColor_assigned = true; else if (strcmp(var->name, "gl_FragData") == 0) gl_FragData_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) + gl_FragSecondaryColor_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) + gl_FragSecondaryData_assigned = true; else if (!is_gl_identifier(var->name)) { if (state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out) { @@ -7246,11 +7395,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, _mesa_glsl_error(&loc, state, "fragment shader writes to both " "`gl_FragColor' and `%s'", user_defined_fs_output->name); + } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragSecondaryColorEXT' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and" + " `gl_FragSecondaryColorEXT'"); } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { _mesa_glsl_error(&loc, state, "fragment shader writes to both " "`gl_FragData' and `%s'", user_defined_fs_output->name); } + + if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && + !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(&loc, state, + "Dual source blending requires EXT_blend_func_extended"); + } } diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp index 79134c19893..03ed4dcfa2a 100644 --- a/src/glsl/ast_type.cpp +++ b/src/glsl/ast_type.cpp @@ -38,13 +38,16 @@ ast_type_specifier::print(void) const } bool -ast_fully_specified_type::has_qualifiers() const +ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const { /* 'subroutine' isnt a real qualifier. */ ast_type_qualifier subroutine_only; subroutine_only.flags.i = 0; subroutine_only.flags.q.subroutine = 1; subroutine_only.flags.q.subroutine_def = 1; + if (state->has_explicit_uniform_location()) { + subroutine_only.flags.q.explicit_index = 1; + } return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0; } @@ -169,41 +172,32 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, } if (q.flags.q.max_vertices) { - if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) { + if (this->max_vertices) { + this->max_vertices->merge_qualifier(q.max_vertices); + } else { + this->max_vertices = q.max_vertices; + } + } + + if (q.flags.q.subroutine_def) { + if (this->flags.q.subroutine_def) { _mesa_glsl_error(loc, state, - "geometry shader set conflicting max_vertices " - "(%d and %d)", this->max_vertices, q.max_vertices); - return false; + "conflicting subroutine qualifiers used"); + } else { + this->subroutine_list = q.subroutine_list; } - this->max_vertices = q.max_vertices; } if (q.flags.q.invocations) { - if (this->flags.q.invocations && this->invocations != q.invocations) { - _mesa_glsl_error(loc, state, - "geometry shader set conflicting invocations " - "(%d and %d)", this->invocations, q.invocations); - return false; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; } - this->invocations = q.invocations; } if (state->stage == MESA_SHADER_GEOMETRY && state->has_explicit_attrib_stream()) { - if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) { - _mesa_glsl_error(loc, state, - "`stream' value is larger than MAX_VERTEX_STREAMS - 1 " - "(%d > %d)", - q.stream, state->ctx->Const.MaxVertexStreams - 1); - } - if (this->flags.q.explicit_stream && - this->stream >= state->ctx->Const.MaxVertexStreams) { - _mesa_glsl_error(loc, state, - "`stream' value is larger than MAX_VERTEX_STREAMS - 1 " - "(%d > %d)", - this->stream, state->ctx->Const.MaxVertexStreams - 1); - } - if (!this->flags.q.explicit_stream) { if (q.flags.q.stream) { this->flags.q.stream = 1; @@ -222,14 +216,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, } if (q.flags.q.vertices) { - if (this->flags.q.vertices && this->vertices != q.vertices) { - _mesa_glsl_error(loc, state, - "tessellation control shader set conflicting " - "vertices (%d and %d)", - this->vertices, q.vertices); - return false; + if (this->vertices) { + this->vertices->merge_qualifier(q.vertices); + } else { + this->vertices = q.vertices; } - this->vertices = q.vertices; } if (q.flags.q.vertex_spacing) { @@ -266,15 +257,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc, for (int i = 0; i < 3; i++) { if (q.flags.q.local_size & (1 << i)) { - if ((this->flags.q.local_size & (1 << i)) && - this->local_size[i] != q.local_size[i]) { - _mesa_glsl_error(loc, state, - "compute shader set conflicting values for " - "local_size_%c (%d and %d)", 'x' + i, - this->local_size[i], q.local_size[i]); - return false; + if (this->local_size[i]) { + this->local_size[i]->merge_qualifier(q.local_size[i]); + } else { + this->local_size[i] = q.local_size[i]; } - this->local_size[i] = q.local_size[i]; } } @@ -313,7 +300,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, const bool r = this->merge_qualifier(loc, state, q); if (state->stage == MESA_SHADER_TESS_CTRL) { - node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices); + node = new(mem_ctx) ast_tcs_output_layout(*loc); } return r; @@ -417,15 +404,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, state->in_qualifier->prim_type = q.prim_type; } - if (this->flags.q.invocations && - q.flags.q.invocations && - this->invocations != q.invocations) { - _mesa_glsl_error(loc, state, - "conflicting invocations counts specified"); - return false; - } else if (q.flags.q.invocations) { + if (q.flags.q.invocations) { this->flags.q.invocations = 1; - this->invocations = q.invocations; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } } if (q.flags.q.early_fragment_tests) { @@ -468,15 +453,67 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, if (create_gs_ast) { node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); } else if (create_cs_ast) { - /* Infer a local_size of 1 for every unspecified dimension */ - unsigned local_size[3]; - for (int i = 0; i < 3; i++) { - if (q.flags.q.local_size & (1 << i)) - local_size[i] = q.local_size[i]; - else - local_size[i] = 1; + node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size); + } + + return true; +} + +bool +ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, + bool can_be_zero) +{ + int min_value = 0; + bool first_pass = true; + *value = 0; + + if (!can_be_zero) + min_value = 1; + + for (exec_node *node = layout_const_expressions.head; + !node->is_tail_sentinel(); node = node->next) { + + exec_list dummy_instructions; + ast_node *const_expression = exec_node_data(ast_node, node, link); + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < min_value) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid " + "(%d < %d)", qual_indentifier, + const_int->value.i[0], min_value); + return false; } - node = new(mem_ctx) ast_cs_input_layout(*loc, local_size); + + if (!first_pass && *value != const_int->value.u[0]) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier does not " + "match previous declaration (%d vs %d)", + qual_indentifier, *value, const_int->value.i[0]); + return false; + } else { + first_pass = false; + *value = const_int->value.u[0]; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); } return true; diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 13494446b59..881ee2b6b55 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -290,6 +290,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state) } static bool +texture_samples_identical(const _mesa_glsl_parse_state *state) +{ + return texture_multisample(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +texture_samples_identical_array(const _mesa_glsl_parse_state *state) +{ + return texture_multisample_array(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) { return state->stage == MESA_SHADER_FRAGMENT && @@ -724,6 +738,7 @@ private: BA2(textureQueryLod); B1(textureQueryLevels); + BA2(textureSamplesIdentical); B1(dFdx); B1(dFdy); B1(fwidth); @@ -2210,6 +2225,16 @@ builtin_builder::create_builtins() NULL); + add_function("textureSamplesIdenticalEXT", + _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + add_function("texture1D", _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), @@ -3573,7 +3598,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type ir_constant_data infinities; for (int i = 0; i < type->vector_elements; i++) { - infinities.f[i] = INFINITY; + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + infinities.f[i] = INFINITY; + break; + case GLSL_TYPE_DOUBLE: + infinities.d[i] = INFINITY; + break; + default: + unreachable("unknown type"); + } } body.emit(ret(equal(abs(x), imm(type, infinities)))); @@ -4675,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) return sig; } +ir_function_signature * +builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + const glsl_type *return_type = glsl_type::bool_type; + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) @@ -5243,8 +5296,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type, ir_function_signature * builtin_builder::_image_samples_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags) + unsigned /* num_arguments */, + unsigned /* flags */) { ir_variable *image = in_var(image_type, "image"); ir_function_signature *sig = diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index b06c1bc5c12..e8eab808a19 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type, this->fields[this->num_fields].centroid = 0; this->fields[this->num_fields].sample = 0; this->fields[this->num_fields].patch = 0; + this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; this->num_fields++; } @@ -376,6 +377,11 @@ private: return add_variable(name, type, ir_var_shader_out, slot); } + ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name) + { + return add_index_variable(name, type, ir_var_shader_out, slot, index); + } + ir_variable *add_system_value(int slot, const glsl_type *type, const char *name) { @@ -384,6 +390,8 @@ private: ir_variable *add_variable(const char *name, const glsl_type *type, enum ir_variable_mode mode, int slot); + ir_variable *add_index_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot, int index); ir_variable *add_uniform(const glsl_type *type, const char *name); ir_variable *add_const(const char *name, int value); ir_variable *add_const_ivec3(const char *name, int x, int y, int z); @@ -429,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator( { } +ir_variable * +builtin_variable_generator::add_index_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot, int index) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 1; + var->data.index = index; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} ir_variable * builtin_variable_generator::add_variable(const char *name, @@ -580,6 +628,14 @@ builtin_variable_generator::generate_constants() add_const("gl_MaxVaryingVectors", state->ctx->Const.MaxVarying); } + + /* EXT_blend_func_extended brings a built in constant + * for determining number of dual source draw buffers + */ + if (state->EXT_blend_func_extended_enable) { + add_const("gl_MaxDualSourceDrawBuffersEXT", + state->Const.MaxDualSourceDrawBuffers); + } } else { add_const("gl_MaxVertexUniformComponents", state->Const.MaxVertexUniformComponents); @@ -1016,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars() array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); } + if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { + /* We make an assumption here that there will only ever be one dual-source draw buffer + * In case this assumption is ever proven to be false, make sure to assert here + * since we don't handle this case. + * In practice, this issue will never arise since no hardware will support it. + */ + assert(state->Const.MaxDualSourceDrawBuffers <= 1); + add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); + add_index_output(FRAG_RESULT_DATA0, 1, + array(vec4_t, state->Const.MaxDualSourceDrawBuffers), + "gl_SecondaryFragDataEXT"); + } + /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL * ES 1.00. */ @@ -1186,6 +1255,7 @@ builtin_variable_generator::generate_varyings() var->data.centroid = fields[i].centroid; var->data.sample = fields[i].sample; var->data.patch = fields[i].patch; + var->data.precision = fields[i].precision; var->init_interface_type(per_vertex_out_type); } } diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 6aa7abec00e..2fd4cf04079 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio add_builtin_define(parser, "GL_OES_standard_derivatives", 1); if (extensions->ARB_texture_multisample) add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); + if (extensions->ARB_blend_func_extended) + add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); } } else { add_builtin_define(parser, "GL_ARB_draw_buffers", 1); @@ -2510,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions != NULL) { if (extensions->EXT_shader_integer_mix) add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1); + + if (extensions->EXT_shader_samples_identical) + add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1); } if (version >= 150) diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index adf6a05acce..5a8f98019d1 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %type <node> conditionopt %type <node> for_init_statement %type <for_rest_statement> for_rest_statement -%type <n> integer_constant %type <node> layout_defaults %right THEN ELSE @@ -1152,11 +1151,6 @@ layout_qualifier_id_list: } ; -integer_constant: - INTCONSTANT { $$ = $1; } - | UINTCONSTANT { $$ = $1; } - ; - layout_qualifier_id: any_identifier { @@ -1453,9 +1447,18 @@ layout_qualifier_id: YYERROR; } } - | any_identifier '=' integer_constant + | any_identifier '=' constant_expression { memset(& $$, 0, sizeof($$)); + void *ctx = state; + + if ($3->oper != ast_int_constant && + $3->oper != ast_uint_constant && + !state->has_enhanced_layouts()) { + _mesa_glsl_error(& @1, state, + "compile-time constant expressions require " + "GLSL 4.40 or ARB_enhanced_layouts"); + } if (match_layout_qualifier("location", $1, state) == 0) { $$.flags.q.explicit_location = 1; @@ -1466,24 +1469,17 @@ layout_qualifier_id: "GL_ARB_explicit_attrib_location layout " "identifier `%s' used", $1); } - - if ($3 >= 0) { - $$.location = $3; - } else { - _mesa_glsl_error(& @3, state, "invalid location %d specified", $3); - YYERROR; - } + $$.location = $3; } if (match_layout_qualifier("index", $1, state) == 0) { - $$.flags.q.explicit_index = 1; - - if ($3 >= 0) { - $$.index = $3; - } else { - _mesa_glsl_error(& @3, state, "invalid index %d specified", $3); + if (state->es_shader && !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended"); YYERROR; } + + $$.flags.q.explicit_index = 1; + $$.index = $3; } if ((state->has_420pack() || @@ -1502,18 +1498,11 @@ layout_qualifier_id: if (match_layout_qualifier("max_vertices", $1, state) == 0) { $$.flags.q.max_vertices = 1; - - if ($3 < 0) { + $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(150, 0)) { _mesa_glsl_error(& @3, state, - "invalid max_vertices %d specified", $3); - YYERROR; - } else { - $$.max_vertices = $3; - if (!state->is_version(150, 0)) { - _mesa_glsl_error(& @3, state, - "#version 150 max_vertices qualifier " - "specified", $3); - } + "#version 150 max_vertices qualifier " + "specified", $3); } } @@ -1521,15 +1510,8 @@ layout_qualifier_id: if (match_layout_qualifier("stream", $1, state) == 0 && state->check_explicit_attrib_stream_allowed(& @3)) { $$.flags.q.stream = 1; - - if ($3 < 0) { - _mesa_glsl_error(& @3, state, - "invalid stream %d specified", $3); - YYERROR; - } else { - $$.flags.q.explicit_stream = 1; - $$.stream = $3; - } + $$.flags.q.explicit_stream = 1; + $$.stream = $3; } } @@ -1541,12 +1523,7 @@ layout_qualifier_id: for (int i = 0; i < 3; i++) { if (match_layout_qualifier(local_size_qualifiers[i], $1, state) == 0) { - if ($3 <= 0) { - _mesa_glsl_error(& @3, state, - "invalid %s of %d specified", - local_size_qualifiers[i], $3); - YYERROR; - } else if (!state->has_compute_shader()) { + if (!state->has_compute_shader()) { _mesa_glsl_error(& @3, state, "%s qualifier requires GLSL 4.30 or " "GLSL ES 3.10 or ARB_compute_shader", @@ -1554,7 +1531,7 @@ layout_qualifier_id: YYERROR; } else { $$.flags.q.local_size |= (1 << i); - $$.local_size[i] = $3; + $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3); } break; } @@ -1562,48 +1539,24 @@ layout_qualifier_id: if (match_layout_qualifier("invocations", $1, state) == 0) { $$.flags.q.invocations = 1; - - if ($3 <= 0) { + $$.invocations = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable) { _mesa_glsl_error(& @3, state, - "invalid invocations %d specified", $3); - YYERROR; - } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) { - _mesa_glsl_error(& @3, state, - "invocations (%d) exceeds " - "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3); - YYERROR; - } else { - $$.invocations = $3; - if (!state->is_version(400, 0) && - !state->ARB_gpu_shader5_enable) { - _mesa_glsl_error(& @3, state, - "GL_ARB_gpu_shader5 invocations " - "qualifier specified", $3); - } + "GL_ARB_gpu_shader5 invocations " + "qualifier specified", $3); } } /* Layout qualifiers for tessellation control shaders. */ if (match_layout_qualifier("vertices", $1, state) == 0) { $$.flags.q.vertices = 1; - - if ($3 <= 0) { - _mesa_glsl_error(& @3, state, - "invalid vertices (%d) specified", $3); - YYERROR; - } else if ($3 > (int)state->Const.MaxPatchVertices) { - _mesa_glsl_error(& @3, state, - "vertices (%d) exceeds " - "GL_MAX_PATCH_VERTICES", $3); - YYERROR; - } else { - $$.vertices = $3; - if (!state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "vertices qualifier requires GLSL 4.00 or " - "ARB_tessellation_shader"); - } + $$.vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertices qualifier requires GLSL 4.00 or " + "ARB_tessellation_shader"); } } diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 02584c62a4d..b41b64af2c1 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -104,6 +104,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; + this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; + /* 1.50 constants */ this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; @@ -646,9 +648,11 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(AMD_shader_trinary_minmax, true, false, dummy_true), EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), EXT(EXT_draw_buffers, false, true, dummy_true), EXT(EXT_separate_shader_objects, false, true, dummy_true), EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), + EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), EXT(EXT_texture_array, true, false, EXT_texture_array), }; @@ -1646,8 +1650,20 @@ set_shader_inout_layout(struct gl_shader *shader, switch (shader->Stage) { case MESA_SHADER_TESS_CTRL: shader->TessCtrl.VerticesOut = 0; - if (state->tcs_output_vertices_specified) - shader->TessCtrl.VerticesOut = state->out_qualifier->vertices; + if (state->tcs_output_vertices_specified) { + unsigned vertices; + if (state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &vertices, + false)) { + + YYLTYPE loc = state->out_qualifier->vertices->get_location(); + if (vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", vertices); + } + shader->TessCtrl.VerticesOut = vertices; + } + } break; case MESA_SHADER_TESS_EVAL: shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; @@ -1668,8 +1684,14 @@ set_shader_inout_layout(struct gl_shader *shader, break; case MESA_SHADER_GEOMETRY: shader->Geom.VerticesOut = 0; - if (state->out_qualifier->flags.q.max_vertices) - shader->Geom.VerticesOut = state->out_qualifier->max_vertices; + if (state->out_qualifier->flags.q.max_vertices) { + unsigned qual_max_vertices; + if (state->out_qualifier->max_vertices-> + process_qualifier_constant(state, "max_vertices", + &qual_max_vertices, true)) { + shader->Geom.VerticesOut = qual_max_vertices; + } + } if (state->gs_input_prim_type_specified) { shader->Geom.InputType = state->in_qualifier->prim_type; @@ -1684,8 +1706,22 @@ set_shader_inout_layout(struct gl_shader *shader, } shader->Geom.Invocations = 0; - if (state->in_qualifier->flags.q.invocations) - shader->Geom.Invocations = state->in_qualifier->invocations; + if (state->in_qualifier->flags.q.invocations) { + unsigned invocations; + if (state->in_qualifier->invocations-> + process_qualifier_constant(state, "invocations", + &invocations, false)) { + + YYLTYPE loc = state->in_qualifier->invocations->get_location(); + if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { + _mesa_glsl_error(&loc, state, + "invocations (%d) exceeds " + "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", + invocations); + } + shader->Geom.Invocations = invocations; + } + } break; case MESA_SHADER_COMPUTE: @@ -1797,6 +1833,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, if (shader->InfoLog) ralloc_free(shader->InfoLog); + if (!state->error) + set_shader_inout_layout(shader, state); + shader->symbols = new(shader->ir) glsl_symbol_table; shader->CompileStatus = !state->error; shader->InfoLog = state->info_log; @@ -1804,9 +1843,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, shader->IsES = state->es_shader; shader->uses_builtin_functions = state->uses_builtin_functions; - if (!state->error) - set_shader_inout_layout(shader, state); - /* Retain any live IR, but trash the rest. */ reparent_ir(shader->ir, shader->ir); diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 1d8c1b8799f..17ff0b5af79 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state { /* ARB_draw_buffers */ unsigned MaxDrawBuffers; + /* ARB_blend_func_extended */ + unsigned MaxDualSourceDrawBuffers; + /* 3.00 ES */ int MinProgramTexelOffset; int MaxProgramTexelOffset; @@ -595,12 +598,16 @@ struct _mesa_glsl_parse_state { bool AMD_vertex_shader_layer_warn; bool AMD_vertex_shader_viewport_index_enable; bool AMD_vertex_shader_viewport_index_warn; + bool EXT_blend_func_extended_enable; + bool EXT_blend_func_extended_warn; bool EXT_draw_buffers_enable; bool EXT_draw_buffers_warn; bool EXT_separate_shader_objects_enable; bool EXT_separate_shader_objects_warn; bool EXT_shader_integer_mix_enable; bool EXT_shader_integer_mix_warn; + bool EXT_shader_samples_identical_enable; + bool EXT_shader_samples_identical_warn; bool EXT_texture_array_enable; bool EXT_texture_array_warn; /*@}*/ diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 8933b230177..ca520f547a1 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -1421,12 +1421,11 @@ ir_dereference::is_lvalue() const } -static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" }; +static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; const char *ir_texture::opcode_string() { - assert((unsigned int) op <= - sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0])); + assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); return tex_opcode_strs[op]; } @@ -1456,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) } else if (this->op == ir_lod) { assert(type->vector_elements == 2); assert(type->base_type == GLSL_TYPE_FLOAT); + } else if (this->op == ir_samples_identical) { + assert(type == glsl_type::bool_type); + assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); + assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); } else { assert(sampler->type->sampler_type == (int) type->base_type); if (sampler->type->sampler_shadow) @@ -1676,6 +1679,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this->data.interpolation = INTERP_QUALIFIER_NONE; this->data.max_array_access = 0; this->data.atomic.offset = 0; + this->data.precision = GLSL_PRECISION_NONE; this->data.image_read_only = false; this->data.image_write_only = false; this->data.image_coherent = false; @@ -1842,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params) ir_function::ir_function(const char *name) : ir_instruction(ir_type_function) { + this->subroutine_index = -1; this->name = ralloc_strdup(this, name); } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index d59dee1e369..e1109eec1d3 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1171,6 +1171,8 @@ public: */ int num_subroutine_types; const struct glsl_type **subroutine_types; + + int subroutine_index; }; inline const char *ir_function_signature::function_name() const @@ -1965,6 +1967,7 @@ enum ir_texture_opcode { ir_tg4, /**< Texture gather */ ir_query_levels, /**< Texture levels query */ ir_texture_samples, /**< Texture samples query */ + ir_samples_identical, /**< Query whether all samples are definitely identical. */ }; @@ -1991,6 +1994,7 @@ enum ir_texture_opcode { * (lod <type> <sampler> <coordinate>) * (tg4 <type> <sampler> <coordinate> <offset> <component>) * (query_levels <type> <sampler>) + * (samples_identical <sampler> <coordinate>) */ class ir_texture : public ir_rvalue { public: diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index d6b06eeec87..2aef4fcb4ac 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); @@ -269,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const ir_function *copy = new(mem_ctx) ir_function(this->name); copy->is_subroutine = this->is_subroutine; + copy->subroutine_index = this->subroutine_index; copy->num_subroutine_types = this->num_subroutine_types; copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); for (int i = 0; i < copy->num_subroutine_types; i++) diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp index 5f0785e0ece..b86f4ea16bb 100644 --- a/src/glsl/ir_equals.cpp +++ b/src/glsl/ir_equals.cpp @@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const return false; for (unsigned i = 0; i < type->components(); i++) { - if (value.u[i] != other->value.u[i]) - return false; + if (type->base_type == GLSL_TYPE_DOUBLE) { + if (value.d[i] != other->value.d[i]) + return false; + } else { + if (value.u[i] != other->value.u[i]) + return false; + } } return true; @@ -152,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: if (!lod_info.bias->equals(other->lod_info.bias, ignore)) diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp index 6495cc4581d..213992af28c 100644 --- a/src/glsl/ir_hv_accept.cpp +++ b/src/glsl/ir_hv_accept.cpp @@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: s = this->lod_info.bias->accept(v); diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index 42b03fdea52..fd7bc2eea98 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir) { fprintf(f, "(%s ", ir->opcode_string()); + if (ir->op == ir_samples_identical) { + ir->sampler->accept(this); + fprintf(f, " "); + ir->coordinate->accept(this); + fprintf(f, ")"); + return; + } + print_type(f, ir->type); fprintf(f, " "); @@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir) case ir_tg4: ir->lod_info.component->accept(this); break; + case ir_samples_identical: + unreachable(!"ir_samples_identical was already handled"); }; fprintf(f, ")"); } diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp index a6966f546bc..6486838b8b8 100644 --- a/src/glsl/ir_rvalue_visitor.cpp +++ b/src/glsl/ir_rvalue_visitor.cpp @@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: handle_rvalue(&ir->lod_info.bias); diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 7e77a675db1..c0b4b3e820c 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -766,7 +766,7 @@ public: gl_shader_stage consumer_stage); ~varying_matches(); void record(ir_variable *producer_var, ir_variable *consumer_var); - unsigned assign_locations(); + unsigned assign_locations(uint64_t reserved_slots); void store_locations() const; private: @@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) * passed to varying_matches::record(). */ unsigned -varying_matches::assign_locations() +varying_matches::assign_locations(uint64_t reserved_slots) { /* Sort varying matches into an order that makes them easy to pack. */ qsort(this->matches, this->num_matches, sizeof(*this->matches), @@ -1013,6 +1013,10 @@ varying_matches::assign_locations() != this->matches[i].packing_class) { *location = ALIGN(*location, 4); } + while ((*location < MAX_VARYING * 4u) && + (reserved_slots & (1u << *location / 4u))) { + *location = ALIGN(*location + 1, 4); + } this->matches[i].generic_location = *location; @@ -1376,6 +1380,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) } /** + * Generate a bitfield map of the explicit locations for shader varyings. + * + * In theory a 32 bits value will be enough but a 64 bits value is future proof. + */ +uint64_t +reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */ + + uint64_t slots = 0; + int var_slot; + + if (!stage) + return slots; + + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location) + continue; + + var_slot = var->data.location - VARYING_SLOT_VAR0; + if (var_slot >= 0 && var_slot < MAX_VARYING) + slots |= 1u << var_slot; + } + + return slots; +} + + +/** * Assign locations for all variables that are produced in one pipeline stage * (the "producer") and consumed in the next stage (the "consumer"). * @@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx, matches.record(matched_candidate->toplevel_var, NULL); } - const unsigned slots_used = matches.assign_locations(); + const uint64_t reserved_slots = + reserved_varying_slot(producer, ir_var_shader_out) | + reserved_varying_slot(consumer, ir_var_shader_in); + + const unsigned slots_used = matches.assign_locations(reserved_slots); matches.store_locations(); for (unsigned i = 0; i < num_tfeedback_decls; ++i) { diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index db00f8febc6..331d9a28007 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog) sh->SubroutineFunctions[sh->NumSubroutineFunctions].types = ralloc_array(sh, const struct glsl_type *, fn->num_subroutine_types); + + /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the + * GLSL 4.5 spec: + * + * "Each subroutine with an index qualifier in the shader must be + * given a unique index, otherwise a compile or link error will be + * generated." + */ + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (sh->SubroutineFunctions[j].index != -1 && + sh->SubroutineFunctions[j].index == fn->subroutine_index) { + linker_error(prog, "each subroutine index qualifier in the " + "shader must be unique\n"); + return; + } + } + sh->SubroutineFunctions[sh->NumSubroutineFunctions].index = + fn->subroutine_index; + for (int j = 0; j < fn->num_subroutine_types; j++) sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j]; sh->NumSubroutineFunctions++; } + + /* Assign index for subroutines without an explicit index*/ + int index = 0; + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + while (sh->SubroutineFunctions[j].index == -1) { + for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) { + if (sh->SubroutineFunctions[k].index == index) + break; + else if (k == sh->NumSubroutineFunctions - 1) + sh->SubroutineFunctions[j].index = index; + } + index++; + } + } } } diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index d8df3544f10..a26300d1d26 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -31,6 +31,7 @@ #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" #include "ir.h" +#include "main/imports.h" /* * pass to lower GLSL IR to NIR @@ -147,16 +148,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, nir_lower_outputs_to_temporaries(shader); - /* TODO: Use _mesa_fls instead */ - unsigned num_textures = 0; - for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++) - if (sh->Program->SamplersUsed & (1 << i)) - num_textures = i; - shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); if (shader_prog->Label) shader->info.label = ralloc_strdup(shader, shader_prog->Label); - shader->info.num_textures = num_textures; + shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); shader->info.num_ubos = sh->NumUniformBlocks; shader->info.num_abos = shader_prog->NumAtomicBuffers; shader->info.num_ssbos = sh->NumShaderStorageBlocks; @@ -174,6 +169,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog, shader_prog->TransformFeedback.NumVarying > 0; switch (stage) { + case MESA_SHADER_TESS_CTRL: + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; + break; + case MESA_SHADER_GEOMETRY: shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; shader->info.gs.output_primitive = sh->Geom.OutputType; @@ -244,6 +243,8 @@ constant_copy(ir_constant *ir, void *mem_ctx) unsigned total_elems = ir->type->components(); unsigned i; + + ret->num_elements = 0; switch (ir->type->base_type) { case GLSL_TYPE_UINT: for (i = 0; i < total_elems; i++) @@ -268,6 +269,8 @@ constant_copy(ir_constant *ir, void *mem_ctx) case GLSL_TYPE_STRUCT: ret->elements = ralloc_array(mem_ctx, nir_constant *, ir->type->length); + ret->num_elements = ir->type->length; + i = 0; foreach_in_list(ir_constant, field, &ir->components) { ret->elements[i] = constant_copy(field, mem_ctx); @@ -278,6 +281,7 @@ constant_copy(ir_constant *ir, void *mem_ctx) case GLSL_TYPE_ARRAY: ret->elements = ralloc_array(mem_ctx, nir_constant *, ir->type->length); + ret->num_elements = ir->type->length; for (i = 0; i < ir->type->length; i++) ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); @@ -297,15 +301,6 @@ nir_visitor::visit(ir_variable *ir) var->type = ir->type; var->name = ralloc_strdup(var, ir->name); - if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) { - unsigned size = ir->get_interface_type()->length; - var->max_ifc_array_access = ralloc_array(var, unsigned, size); - memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(), - size * sizeof(unsigned)); - } else { - var->max_ifc_array_access = NULL; - } - var->data.read_only = ir->data.read_only; var->data.centroid = ir->data.centroid; var->data.sample = ir->data.sample; @@ -1543,9 +1538,9 @@ nir_visitor::visit(ir_expression *ir) result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) : nir_for(&b, srcs[0], srcs[1]); break; - case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; - result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) - : nir_for(&b, srcs[0], srcs[1]); + case ir_binop_logic_xor: + result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) + : nir_fxor(&b, srcs[0], srcs[1]); break; case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; case ir_binop_rshift: @@ -1808,6 +1803,11 @@ nir_visitor::visit(ir_texture *ir) num_srcs = 0; break; + case ir_samples_identical: + op = nir_texop_samples_identical; + num_srcs = 1; /* coordinate */ + break; + default: unreachable("not reached"); } @@ -1835,8 +1835,9 @@ nir_visitor::visit(ir_texture *ir) case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; + case GLSL_TYPE_BOOL: case GLSL_TYPE_UINT: - instr->dest_type = nir_type_unsigned; + instr->dest_type = nir_type_uint; break; default: unreachable("not reached"); diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp index 3e9d38f7707..64b5c0cb106 100644 --- a/src/glsl/nir/glsl_types.cpp +++ b/src/glsl/nir/glsl_types.cpp @@ -130,6 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, this->fields.structure[i].image_coherent = fields[i].image_coherent; this->fields.structure[i].image_volatile = fields[i].image_volatile; this->fields.structure[i].image_restrict = fields[i].image_restrict; + this->fields.structure[i].precision = fields[i].precision; } mtx_unlock(&glsl_type::mutex); diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h index 14c2aa49f85..1aafa5cd547 100644 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@ -858,7 +858,7 @@ struct glsl_struct_field { /** * Precision qualifier */ - unsigned precision; + unsigned precision:2; /** * Image qualifiers, applicable to buffer variables defined in shader @@ -873,7 +873,8 @@ struct glsl_struct_field { #ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), - sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0) + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + precision(GLSL_PRECISION_NONE) { /* empty */ } diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 3157ff82d99..79df6d3df94 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -107,6 +107,10 @@ void nir_shader_add_variable(nir_shader *shader, nir_variable *var) { switch (var->data.mode) { + case nir_var_all: + assert(!"invalid mode"); + break; + case nir_var_local: assert(!"nir_shader_add_variable cannot be used for local variables"); break; @@ -312,6 +316,14 @@ nir_block_create(nir_shader *shader) block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); block->imm_dom = NULL; + /* XXX maybe it would be worth it to defer allocation? This + * way it doesn't get allocated for shader ref's that never run + * nir_calc_dominance? For example, state-tracker creates an + * initial IR, clones that, runs appropriate lowering pass, passes + * to driver which does common lowering/opt, and then stores ref + * which is later used to do state specific lowering and futher + * opt. Do any of the references not need dominance metadata? + */ block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, _mesa_key_pointer_equal); @@ -1306,21 +1318,62 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) { assert(!new_src.is_ssa || def != new_src.ssa); - nir_foreach_use_safe(def, use_src) { - nir_instr *src_parent_instr = use_src->parent_instr; - list_del(&use_src->use_link); - nir_src_copy(use_src, &new_src, src_parent_instr); - src_add_all_uses(use_src, src_parent_instr, NULL); - } + nir_foreach_use_safe(def, use_src) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} + +static bool +is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) +{ + assert(start->block == end->block); + + if (between->block != start->block) + return false; + + /* Search backwards looking for "between" */ + while (start != end) { + if (between == end) + return true; - nir_foreach_if_use_safe(def, use_src) { - nir_if *src_parent_if = use_src->parent_if; - list_del(&use_src->use_link); - nir_src_copy(use_src, &new_src, src_parent_if); - src_add_all_uses(use_src, NULL, src_parent_if); + end = nir_instr_prev(end); + assert(end); } + + return false; } +/* Replaces all uses of the given SSA def with the given source but only if + * the use comes after the after_me instruction. This can be useful if you + * are emitting code to fix up the result of some instruction: you can freely + * use the result in that code and then call rewrite_uses_after and pass the + * last fixup instruction as after_me and it will replace all of the uses you + * want without touching the fixup code. + * + * This function assumes that after_me is in the same block as + * def->parent_instr and that after_me comes after def->parent_instr. + */ +void +nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me) +{ + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) { + assert(use_src->parent_instr != def->parent_instr); + /* Since def already dominates all of its uses, the only way a use can + * not be dominated by after_me is if it is between def and after_me in + * the instruction list. + */ + if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + } + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, bool reverse, void *state); @@ -1571,6 +1624,8 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_tess_level_inner; case SYSTEM_VALUE_VERTICES_IN: return nir_intrinsic_load_patch_vertices_in; + case SYSTEM_VALUE_HELPER_INVOCATION: + return nir_intrinsic_load_helper_invocation; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1614,6 +1669,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_TESS_LEVEL_INNER; case nir_intrinsic_load_patch_vertices_in: return SYSTEM_VALUE_VERTICES_IN; + case nir_intrinsic_load_helper_invocation: + return SYSTEM_VALUE_HELPER_INVOCATION; default: unreachable("intrinsic doesn't produce a system value"); } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index df0e6f1f54a..b7374e17407 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -82,6 +82,7 @@ typedef struct { } nir_state_slot; typedef enum { + nir_var_all = -1, nir_var_shader_in, nir_var_shader_out, nir_var_global, @@ -111,6 +112,11 @@ typedef struct nir_constant { */ union nir_constant_data value; + /* we could get this from the var->type but makes clone *much* easier to + * not have to care about the type. + */ + unsigned num_elements; + /* Array elements / Structure Fields */ struct nir_constant **elements; } nir_constant; @@ -147,19 +153,6 @@ typedef struct { */ char *name; - /** - * For variables which satisfy the is_interface_instance() predicate, this - * points to an array of integers such that if the ith member of the - * interface block is an array, max_ifc_array_access[i] is the maximum - * array element of that member that has been accessed. If the ith member - * of the interface block is not an array, max_ifc_array_access[i] is - * unused. - * - * For variables whose type is not an interface block, this pointer is - * NULL. - */ - unsigned *max_ifc_array_access; - struct nir_variable_data { /** @@ -654,7 +647,7 @@ typedef enum { nir_type_invalid = 0, /* Not a valid type */ nir_type_float, nir_type_int, - nir_type_unsigned, + nir_type_uint, nir_type_bool } nir_alu_type; @@ -977,6 +970,9 @@ typedef enum { nir_texop_tg4, /**< Texture gather */ nir_texop_query_levels, /**< Texture levels query */ nir_texop_texture_samples, /**< Texture samples query */ + nir_texop_samples_identical, /**< Query whether all samples are definitely + * identical. + */ } nir_texop; typedef struct { @@ -1069,6 +1065,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) case nir_texop_texture_samples: case nir_texop_query_levels: + case nir_texop_samples_identical: return 1; default: @@ -1079,6 +1076,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr) } } +/* Returns true if this texture operation queries something about the texture + * rather than actually sampling it. + */ +static inline bool +nir_tex_instr_is_query(nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_texture_samples: + case nir_texop_query_levels: + return true; + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_tg4: + return false; + default: + unreachable("Invalid texture opcode"); + } +} + static inline unsigned nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) { @@ -1353,6 +1375,7 @@ typedef enum { nir_metadata_block_index = 0x1, nir_metadata_dominance = 0x2, nir_metadata_live_ssa_defs = 0x4, + nir_metadata_not_properly_reset = 0x8, } nir_metadata; typedef struct { @@ -1578,6 +1601,11 @@ typedef struct nir_shader_info { struct { unsigned local_size[3]; } cs; + + struct { + /** The number of vertices in the TCS output patch. */ + unsigned vertices_out; + } tcs; }; } nir_shader_info; @@ -1910,6 +1938,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, unsigned num_components, const char *name); void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); +void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me); /* visits basic blocks in source-code order */ typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); @@ -1937,10 +1967,16 @@ void nir_index_blocks(nir_function_impl *impl); void nir_print_shader(nir_shader *shader, FILE *fp); void nir_print_instr(const nir_instr *instr, FILE *fp); +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); + #ifdef DEBUG void nir_validate_shader(nir_shader *shader); +void nir_metadata_set_validation_flag(nir_shader *shader); +void nir_metadata_check_validation_flag(nir_shader *shader); #else static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } #endif /* DEBUG */ void nir_calc_dominance_impl(nir_function_impl *impl); @@ -2032,9 +2068,22 @@ typedef struct nir_lower_tex_options { unsigned saturate_s; unsigned saturate_t; unsigned saturate_r; + + /* Bitmask of samplers that need swizzling. + * + * If (swizzle_result & (1 << sampler_index)), then the swizzle in + * swizzles[sampler_index] is applied to the result of the texturing + * operation. + */ + unsigned swizzle_result; + + /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles + * while 4 and 5 represent 0 and 1 respectively. + */ + uint8_t swizzles[32][4]; } nir_lower_tex_options; -void nir_lower_tex(nir_shader *shader, +bool nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options); void nir_lower_idiv(nir_shader *shader); diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h index 205aa067b0b..fe41c74b608 100644 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@ -256,7 +256,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], { nir_alu_src alu_src = { NIR_SRC_INIT }; alu_src.src = nir_src_for_ssa(src); - for (int i = 0; i < 4; i++) + for (unsigned i = 0; i < num_components; i++) alu_src.swizzle[i] = swiz[i]; return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : @@ -290,6 +290,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. + * + * See nir_ssa_for_alu_src() for alu instructions. */ static inline nir_ssa_def * nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) @@ -305,6 +307,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) return nir_imov_alu(build, alu, num_components); } +/** + * Similar to nir_ssa_for_src(), but for alu src's, respecting the + * nir_alu_src's swizzle. + */ +static inline nir_ssa_def * +nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) +{ + static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 }; + nir_alu_src *src = &instr->src[srcn]; + unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); + + if (src->src.is_ssa && (src->src.ssa->num_components == num_components) && + !src->abs && !src->negate && + (memcmp(src->swizzle, trivial_swizzle, num_components) == 0)) + return src->src.ssa; + + return nir_imov_alu(build, *src, num_components); +} + static inline nir_ssa_def * nir_load_var(nir_builder *build, nir_variable *var) { diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c new file mode 100644 index 00000000000..68b72ef5381 --- /dev/null +++ b/src/glsl/nir/nir_clone.c @@ -0,0 +1,674 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_control_flow_private.h" + +/* Secret Decoder Ring: + * clone_foo(): + * Allocate and clone a foo. + * __clone_foo(): + * Clone body of foo (ie. parent class, embedded struct, etc) + */ + +typedef struct { + /* maps orig ptr -> cloned ptr: */ + struct hash_table *ptr_table; + + /* List of phi sources. */ + struct list_head phi_srcs; + + /* new shader object, used as memctx for just about everything else: */ + nir_shader *ns; +} clone_state; + +static void +init_clone_state(clone_state *state) +{ + state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + list_inithead(&state->phi_srcs); +} + +static void +free_clone_state(clone_state *state) +{ + _mesa_hash_table_destroy(state->ptr_table, NULL); +} + +static void * +lookup_ptr(clone_state *state, const void *ptr) +{ + struct hash_entry *entry; + + if (!ptr) + return NULL; + + entry = _mesa_hash_table_search(state->ptr_table, ptr); + assert(entry && "Failed to find pointer!"); + if (!entry) + return NULL; + + return entry->data; +} + +static void +store_ptr(clone_state *state, void *nptr, const void *ptr) +{ + _mesa_hash_table_insert(state->ptr_table, ptr, nptr); +} + +static nir_constant * +clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) +{ + nir_constant *nc = ralloc(nvar, nir_constant); + + nc->value = c->value; + nc->num_elements = c->num_elements; + nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); + for (unsigned i = 0; i < c->num_elements; i++) { + nc->elements[i] = clone_constant(state, c->elements[i], nvar); + } + + return nc; +} + +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid + * having to deal with locals and globals separately: + */ +static nir_variable * +clone_variable(clone_state *state, const nir_variable *var) +{ + nir_variable *nvar = rzalloc(state->ns, nir_variable); + store_ptr(state, nvar, var); + + nvar->type = var->type; + nvar->name = ralloc_strdup(nvar, var->name); + nvar->data = var->data; + nvar->num_state_slots = var->num_state_slots; + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + if (var->constant_initializer) { + nvar->constant_initializer = + clone_constant(state, var->constant_initializer, nvar); + } + nvar->interface_type = var->interface_type; + + return nvar; +} + +/* clone list of nir_variable: */ +static void +clone_var_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_variable, var, node, list) { + nir_variable *nvar = clone_variable(state, var); + exec_list_push_tail(dst, &nvar->node); + } +} + +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() + * to avoid having to deal with locals and globals separately: + */ +static nir_register * +clone_register(clone_state *state, const nir_register *reg) +{ + nir_register *nreg = rzalloc(state->ns, nir_register); + store_ptr(state, nreg, reg); + + nreg->num_components = reg->num_components; + nreg->num_array_elems = reg->num_array_elems; + nreg->index = reg->index; + nreg->name = ralloc_strdup(nreg, reg->name); + nreg->is_global = reg->is_global; + nreg->is_packed = reg->is_packed; + + /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ + list_inithead(&nreg->uses); + list_inithead(&nreg->defs); + list_inithead(&nreg->if_uses); + + return nreg; +} + +/* clone list of nir_register: */ +static void +clone_reg_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_register, reg, node, list) { + nir_register *nreg = clone_register(state, reg); + exec_list_push_tail(dst, &nreg->node); + } +} + +static void +__clone_src(clone_state *state, void *ninstr_or_if, + nir_src *nsrc, const nir_src *src) +{ + nsrc->is_ssa = src->is_ssa; + if (src->is_ssa) { + nsrc->ssa = lookup_ptr(state, src->ssa); + } else { + nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + if (src->reg.indirect) { + nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); + } + nsrc->reg.base_offset = src->reg.base_offset; + } +} + +static void +__clone_dst(clone_state *state, nir_instr *ninstr, + nir_dest *ndst, const nir_dest *dst) +{ + ndst->is_ssa = dst->is_ssa; + if (dst->is_ssa) { + nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); + store_ptr(state, &ndst->ssa, &dst->ssa); + } else { + ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + if (dst->reg.indirect) { + ndst->reg.indirect = ralloc(ninstr, nir_src); + __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); + } + ndst->reg.base_offset = dst->reg.base_offset; + } +} + +static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, + nir_instr *ninstr, nir_deref *parent); + +static nir_deref_var * +clone_deref_var(clone_state *state, const nir_deref_var *dvar, + nir_instr *ninstr) +{ + nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); + + if (dvar->deref.child) + ndvar->deref.child = clone_deref(state, dvar->deref.child, + ninstr, &ndvar->deref); + + return ndvar; +} + +static nir_deref_array * +clone_deref_array(clone_state *state, const nir_deref_array *darr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_array *ndarr = nir_deref_array_create(parent); + + ndarr->deref.type = darr->deref.type; + if (darr->deref.child) + ndarr->deref.child = clone_deref(state, darr->deref.child, + ninstr, &ndarr->deref); + + ndarr->deref_array_type = darr->deref_array_type; + ndarr->base_offset = darr->base_offset; + if (ndarr->deref_array_type == nir_deref_array_type_indirect) + __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); + + return ndarr; +} + +static nir_deref_struct * +clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); + + ndstr->deref.type = dstr->deref.type; + if (dstr->deref.child) + ndstr->deref.child = clone_deref(state, dstr->deref.child, + ninstr, &ndstr->deref); + + return ndstr; +} + +static nir_deref * +clone_deref(clone_state *state, const nir_deref *dref, + nir_instr *ninstr, nir_deref *parent) +{ + switch (dref->deref_type) { + case nir_deref_type_array: + return &clone_deref_array(state, nir_deref_as_array(dref), + ninstr, parent)->deref; + case nir_deref_type_struct: + return &clone_deref_struct(state, nir_deref_as_struct(dref), + ninstr, parent)->deref; + default: + unreachable("bad deref type"); + return NULL; + } +} + +static nir_alu_instr * +clone_alu(clone_state *state, const nir_alu_instr *alu) +{ + nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); + + __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + return nalu; +} + +static nir_intrinsic_instr * +clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) +{ + nir_intrinsic_instr *nitr = + nir_intrinsic_instr_create(state->ns, itr->intrinsic); + + unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; + unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[itr->intrinsic].has_dest) + __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); + + nitr->num_components = itr->num_components; + memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); + + for (unsigned i = 0; i < num_variables; i++) { + nitr->variables[i] = clone_deref_var(state, itr->variables[i], + &nitr->instr); + } + + for (unsigned i = 0; i < num_srcs; i++) + __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); + + return nitr; +} + +static nir_load_const_instr * +clone_load_const(clone_state *state, const nir_load_const_instr *lc) +{ + nir_load_const_instr *nlc = + nir_load_const_instr_create(state->ns, lc->def.num_components); + + memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); + + store_ptr(state, &nlc->def, &lc->def); + + return nlc; +} + +static nir_ssa_undef_instr * +clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) +{ + nir_ssa_undef_instr *nsa = + nir_ssa_undef_instr_create(state->ns, sa->def.num_components); + + store_ptr(state, &nsa->def, &sa->def); + + return nsa; +} + +static nir_tex_instr * +clone_tex(clone_state *state, const nir_tex_instr *tex) +{ + nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); + + ntex->sampler_dim = tex->sampler_dim; + ntex->dest_type = tex->dest_type; + ntex->op = tex->op; + __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); + for (unsigned i = 0; i < ntex->num_srcs; i++) { + ntex->src[i].src_type = tex->src[i].src_type; + __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); + } + ntex->coord_components = tex->coord_components; + ntex->is_array = tex->is_array; + ntex->is_shadow = tex->is_shadow; + ntex->is_new_style_shadow = tex->is_new_style_shadow; + memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); + ntex->component = tex->component; + ntex->texture_index = tex->texture_index; + ntex->texture_array_size = tex->texture_array_size; + if (tex->texture) + ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); + ntex->sampler_index = tex->sampler_index; + if (tex->sampler) + ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); + + return ntex; +} + +static nir_phi_instr * +clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) +{ + nir_phi_instr *nphi = nir_phi_instr_create(state->ns); + + __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); + + /* Cloning a phi node is a bit different from other instructions. The + * sources of phi instructions are the only time where we can use an SSA + * def before it is defined. In order to handle this, we just copy over + * the sources from the old phi instruction directly and then fix them up + * in a second pass once all the instrutions in the function have been + * properly cloned. + * + * In order to ensure that the copied sources (which are the same as the + * old phi instruction's sources for now) don't get inserted into the old + * shader's use-def lists, we have to add the phi instruction *before* we + * set up its sources. + */ + nir_instr_insert_after_block(nblk, &nphi->instr); + + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { + nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); + + /* Just copy the old source for now. */ + memcpy(nsrc, src, sizeof(*src)); + + /* Since we're not letting nir_insert_instr handle use/def stuff for us, + * we have to set the parent_instr manually. It doesn't really matter + * when we do it, so we might as well do it here. + */ + nsrc->src.parent_instr = &nphi->instr; + + /* Stash it in the list of phi sources. We'll walk this list and fix up + * sources at the very end of clone_function_impl. + */ + list_add(&nsrc->src.use_link, &state->phi_srcs); + + exec_list_push_tail(&nphi->srcs, &nsrc->node); + } + + return nphi; +} + +static nir_jump_instr * +clone_jump(clone_state *state, const nir_jump_instr *jmp) +{ + nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); + + return njmp; +} + +static nir_call_instr * +clone_call(clone_state *state, const nir_call_instr *call) +{ + nir_function_overload *ncallee = lookup_ptr(state, call->callee); + nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); + + for (unsigned i = 0; i < ncall->num_params; i++) + ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); + + ncall->return_deref = clone_deref_var(state, call->return_deref, + &ncall->instr); + + return ncall; +} + +static nir_instr * +clone_instr(clone_state *state, const nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_intrinsic: + return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; + case nir_instr_type_load_const: + return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; + case nir_instr_type_ssa_undef: + return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; + case nir_instr_type_tex: + return &clone_tex(state, nir_instr_as_tex(instr))->instr; + case nir_instr_type_phi: + unreachable("Cannot clone phis with clone_instr"); + case nir_instr_type_jump: + return &clone_jump(state, nir_instr_as_jump(instr))->instr; + case nir_instr_type_call: + return &clone_call(state, nir_instr_as_call(instr))->instr; + case nir_instr_type_parallel_copy: + unreachable("Cannot clone parallel copies"); + default: + unreachable("bad instr type"); + return NULL; + } +} + +static nir_block * +clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) +{ + /* Don't actually create a new block. Just use the one from the tail of + * the list. NIR guarantees that the tail of the list is a block and that + * no two blocks are side-by-side in the IR; It should be empty. + */ + nir_block *nblk = + exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); + assert(nblk->cf_node.type == nir_cf_node_block); + assert(exec_list_is_empty(&nblk->instr_list)); + + /* We need this for phi sources */ + store_ptr(state, nblk, blk); + + nir_foreach_instr(blk, instr) { + if (instr->type == nir_instr_type_phi) { + /* Phi instructions are a bit of a special case when cloning because + * we don't want inserting the instruction to automatically handle + * use/defs for us. Instead, we need to wait until all the + * blocks/instructions are in so that we can set their sources up. + */ + clone_phi(state, nir_instr_as_phi(instr), nblk); + } else { + nir_instr *ninstr = clone_instr(state, instr); + nir_instr_insert_after_block(nblk, ninstr); + } + } + + return nblk; +} + +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list); + +static nir_if * +clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) +{ + nir_if *ni = nir_if_create(state->ns); + + __clone_src(state, ni, &ni->condition, &i->condition); + + nir_cf_node_insert_end(cf_list, &ni->cf_node); + + clone_cf_list(state, &ni->then_list, &i->then_list); + clone_cf_list(state, &ni->else_list, &i->else_list); + + return ni; +} + +static nir_loop * +clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) +{ + nir_loop *nloop = nir_loop_create(state->ns); + + nir_cf_node_insert_end(cf_list, &nloop->cf_node); + + clone_cf_list(state, &nloop->body, &loop->body); + + return nloop; +} + +/* clone list of nir_cf_node: */ +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, cf, node, list) { + switch (cf->type) { + case nir_cf_node_block: + clone_block(state, dst, nir_cf_node_as_block(cf)); + break; + case nir_cf_node_if: + clone_if(state, dst, nir_cf_node_as_if(cf)); + break; + case nir_cf_node_loop: + clone_loop(state, dst, nir_cf_node_as_loop(cf)); + break; + default: + unreachable("bad cf type"); + } + } +} + +static nir_function_impl * +clone_function_impl(clone_state *state, const nir_function_impl *fi, + nir_function_overload *nfo) +{ + nir_function_impl *nfi = nir_function_impl_create(nfo); + + clone_var_list(state, &nfi->locals, &fi->locals); + clone_reg_list(state, &nfi->registers, &fi->registers); + nfi->reg_alloc = fi->reg_alloc; + + nfi->num_params = fi->num_params; + nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); + for (unsigned i = 0; i < fi->num_params; i++) { + nfi->params[i] = lookup_ptr(state, fi->params[i]); + } + nfi->return_var = lookup_ptr(state, fi->return_var); + + assert(list_empty(&state->phi_srcs)); + + clone_cf_list(state, &nfi->body, &fi->body); + + /* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { + src->pred = lookup_ptr(state, src->pred); + assert(src->src.is_ssa); + src->src.ssa = lookup_ptr(state, src->src.ssa); + + /* Remove from this list and place in the uses of the SSA def */ + list_del(&src->src.use_link); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } + assert(list_empty(&state->phi_srcs)); + + /* All metadata is invalidated in the cloning process */ + nfi->valid_metadata = 0; + + return nfi; +} + +static nir_function_overload * +clone_function_overload(clone_state *state, const nir_function_overload *fo, + nir_function *nfxn) +{ + nir_function_overload *nfo = nir_function_overload_create(nfxn); + + /* Needed for call instructions */ + store_ptr(state, nfo, fo); + + nfo->num_params = fo->num_params; + nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params); + memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params); + + nfo->return_type = fo->return_type; + + /* At first glance, it looks like we should clone the function_impl here. + * However, call instructions need to be able to reference at least the + * overload and those will get processed as we clone the function_impl's. + * We stop here and do function_impls as a second pass. + */ + + return nfo; +} + +static nir_function * +clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) +{ + assert(ns == state->ns); + nir_function *nfxn = nir_function_create(ns, fxn->name); + + foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) + clone_function_overload(state, fo, nfxn); + + return nfxn; +} + +nir_shader * +nir_shader_clone(void *mem_ctx, const nir_shader *s) +{ + clone_state state; + init_clone_state(&state); + + nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + state.ns = ns; + + clone_var_list(&state, &ns->uniforms, &s->uniforms); + clone_var_list(&state, &ns->inputs, &s->inputs); + clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->globals, &s->globals); + clone_var_list(&state, &ns->system_values, &s->system_values); + + /* Go through and clone functions and overloads */ + foreach_list_typed(nir_function, fxn, node, &s->functions) + clone_function(&state, fxn, ns); + + /* Only after all overloads are cloned can we clone the actual function + * implementations. This is because nir_call_instr's need to reference the + * overloads of other functions and we don't know what order the functions + * will have in the list. + */ + nir_foreach_overload(s, fo) { + nir_function_overload *nfo = lookup_ptr(&state, fo); + clone_function_impl(&state, fo->impl, nfo); + } + + clone_reg_list(&state, &ns->registers, &s->registers); + ns->reg_alloc = s->reg_alloc; + + ns->info = s->info; + ns->info.name = ralloc_strdup(ns, ns->info.name); + if (ns->info.label) + ns->info.label = ralloc_strdup(ns, ns->info.label); + + ns->num_inputs = s->num_inputs; + ns->num_uniforms = s->num_uniforms; + ns->num_outputs = s->num_outputs; + + free_clone_state(&state); + + return ns; +} diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py index 2ba8554645d..b16ef503c92 100644 --- a/src/glsl/nir/nir_constant_expressions.py +++ b/src/glsl/nir/nir_constant_expressions.py @@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u) } /* Some typed vector structures to make things like src0.y work */ -% for type in ["float", "int", "unsigned", "bool"]: +% for type in ["float", "int", "uint", "bool"]: struct ${type}_vec { ${type} x; ${type} y; diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index 0a134aff211..de30db61eea 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -244,6 +244,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0) SYSTEM_VALUE(work_group_id, 3, 0) SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ SYSTEM_VALUE(num_work_groups, 3, 0) +SYSTEM_VALUE(helper_invocation, 1, 0) /* * The format of the indices depends on the type of the load. For uniforms, diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c index 31ccfb2c02b..c58c7785b3f 100644 --- a/src/glsl/nir/nir_lower_clip.c +++ b/src/glsl/nir/nir_lower_clip.c @@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc, if (output) { exec_list_push_tail(&shader->outputs, &var->node); + shader->num_outputs++; /* TODO use type_size() */ } else { exec_list_push_tail(&shader->inputs, &var->node); + shader->num_inputs++; /* TODO use type_size() */ } return var; } diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c index c961178c53a..f64b3eac8a0 100644 --- a/src/glsl/nir/nir_lower_idiv.c +++ b/src/glsl/nir/nir_lower_idiv.c @@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) bld->cursor = nir_before_instr(&alu->instr); - numer = nir_ssa_for_src(bld, alu->src[0].src, - nir_ssa_alu_instr_src_components(alu, 0)); - denom = nir_ssa_for_src(bld, alu->src[1].src, - nir_ssa_alu_instr_src_components(alu, 1)); + numer = nir_ssa_for_alu_src(bld, alu, 0); + denom = nir_ssa_for_alu_src(bld, alu, 1); if (is_signed) { af = nir_i2f(bld, numer); @@ -96,7 +94,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu) r = nir_imul(bld, q, b); r = nir_isub(bld, a, r); - r = nir_ige(bld, r, b); + r = nir_uge(bld, r, b); r = nir_b2i(bld, r); q = nir_iadd(bld, q, r); diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 00a31458310..5683e69d865 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_variable_mode mode = intrin->variables[0]->var->data.mode; - if (state->mode != -1 && state->mode != mode) + if (state->mode != nir_var_all && state->mode != mode) continue; if (mode != nir_var_shader_in && diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c index 8aaa48ab568..93ebf8e78a9 100644 --- a/src/glsl/nir/nir_lower_tex.c +++ b/src/glsl/nir/nir_lower_tex.c @@ -41,6 +41,7 @@ typedef struct { nir_builder b; const nir_lower_tex_options *options; + bool progress; } lower_tex_state; static void @@ -133,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex) txs->op = nir_texop_txs; txs->sampler_dim = GLSL_SAMPLER_DIM_RECT; txs->sampler_index = tex->sampler_index; + txs->dest_type = nir_type_int; /* only single src, the lod: */ txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); @@ -213,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) } } +static nir_ssa_def * +get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + + if (swizzle_val == 4) { + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0; + } else { + assert(swizzle_val == 5); + if (type == nir_type_float) + v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0; + else + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1; + } + + return nir_build_imm(b, 4, v); +} + +static void +swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) +{ + assert(tex->dest.is_ssa); + + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *swizzled; + if (tex->op == nir_texop_tg4) { + if (swizzle[tex->component] < 4) { + /* This one's easy */ + tex->component = swizzle[tex->component]; + return; + } else { + swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); + } + } else { + assert(nir_tex_instr_dest_size(tex) == 4); + if (swizzle[0] < 4 && swizzle[1] < 4 && + swizzle[2] < 4 && swizzle[3] < 4) { + unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; + /* We have no 0's or 1's, just emit a swizzling MOV */ + swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); + } else { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < 4; i++) { + if (swizzle[i] < 4) { + srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); + } else { + srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); + } + } + swizzled = nir_vec(b, srcs, 4); + } + } + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), + swizzled->parent_instr); +} + static bool nir_lower_tex_block(nir_block *block, void *void_state) { @@ -239,15 +301,28 @@ nir_lower_tex_block(nir_block *block, void *void_state) /* If we are clamping any coords, we must lower projector first * as clamping happens *after* projection: */ - if (lower_txp || sat_mask) + if (lower_txp || sat_mask) { project_src(b, tex); + state->progress = true; + } if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && - state->options->lower_rect) + state->options->lower_rect) { lower_rect(b, tex); + state->progress = true; + } - if (sat_mask) + if (sat_mask) { saturate_src(b, tex, sat_mask); + state->progress = true; + } + + if (((1 << tex->sampler_index) & state->options->swizzle_result) && + !nir_tex_instr_is_query(tex) && + !(tex->is_shadow && tex->is_new_style_shadow)) { + swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]); + state->progress = true; + } } return true; @@ -264,13 +339,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state) nir_metadata_dominance); } -void +bool nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) { lower_tex_state state; state.options = options; + state.progress = false; + nir_foreach_overload(shader, overload) { if (overload->impl) nir_lower_tex_impl(overload->impl, &state); } + + return state.progress; } diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c index db519bf513b..6995b9d6bc1 100644 --- a/src/glsl/nir/nir_lower_two_sided_color.c +++ b/src/glsl/nir/nir_lower_two_sided_color.c @@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot) exec_list_push_tail(&shader->inputs, &var->node); + shader->num_inputs++; /* TODO use type_size() */ + return var; } diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c index 6de981f430f..d5324b35a78 100644 --- a/src/glsl/nir/nir_metadata.c +++ b/src/glsl/nir/nir_metadata.c @@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved) { impl->valid_metadata &= preserved; } + +#ifdef DEBUG +/** + * Make sure passes properly invalidate metadata (part 1). + * + * Call this before running a pass to set a bogus metadata flag, which will + * only be preserved if the pass forgets to call nir_metadata_preserve(). + */ +void +nir_metadata_set_validation_flag(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + overload->impl->valid_metadata |= nir_metadata_not_properly_reset; + } + } +} + +/** + * Make sure passes properly invalidate metadata (part 2). + * + * Call this after a pass makes progress to verify that the bogus metadata set by + * the earlier function was properly thrown away. Note that passes may not call + * nir_metadata_preserve() if they don't actually make any changes at all. + */ +void +nir_metadata_check_validation_flag(nir_shader *shader) +{ + nir_foreach_overload(shader, overload) { + if (overload->impl) { + assert(!(overload->impl->valid_metadata & + nir_metadata_not_properly_reset)); + } + } +} +#endif diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 3c0f1da94af..37d3dfc4588 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -91,7 +91,7 @@ class Opcode(object): tfloat = "float" tint = "int" tbool = "bool" -tunsigned = "unsigned" +tuint = "uint" commutative = "commutative " associative = "associative " @@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)") unop("fexp2", tfloat, "exp2f(src0)") unop("flog2", tfloat, "log2f(src0)") unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion +unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. # Float-to-boolean conversion unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") @@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") # Int-to-boolean conversion unop_convert("i2b", tint, tbool, "src0 != 0") unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion. +unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}") unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}") @@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f") # Floating point pack and unpack operations. def pack_2x16(fmt): - unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """ + unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ dst.x = (uint32_t) pack_fmt_1x16(src0.x); dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; """.replace("fmt", fmt)) def pack_4x8(fmt): - unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """ + unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ dst.x = (uint32_t) pack_fmt_1x8(src0.x); dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; @@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; """.replace("fmt", fmt)) def unpack_2x16(fmt): - unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """ + unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); """.replace("fmt", fmt)) def unpack_4x8(fmt): - unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """ + unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); @@ -248,22 +248,22 @@ unpack_2x16("half") # Lowered floating point unpacking operations. -unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned, +unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") -unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned, +unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, "unpack_half_1x16((uint16_t)(src0.x >> 16))") # Bit operations, part of ARB_gpu_shader5. -unop("bitfield_reverse", tunsigned, """ +unop("bitfield_reverse", tuint, """ /* we're not winning any awards for speed here, but that's ok */ dst = 0; for (unsigned bit = 0; bit < 32; bit++) dst |= ((src0 >> bit) & 1) << (31 - bit); """) -unop("bit_count", tunsigned, """ +unop("bit_count", tuint, """ dst = 0; for (unsigned bit = 0; bit < 32; bit++) { if ((src0 >> bit) & 1) @@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) { } """) -unop_convert("ufind_msb", tunsigned, tint, """ +unop_convert("ufind_msb", tuint, tint, """ dst = -1; for (int bit = 31; bit > 0; bit--) { if ((src0 >> bit) & 1) { @@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1") binop("imul_high", tint, commutative, "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") # high 32-bits of unsigned integer multiply -binop("umul_high", tunsigned, commutative, +binop("umul_high", tuint, commutative, "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") binop("fdiv", tfloat, "", "src0 / src1") binop("idiv", tint, "", "src0 / src1") -binop("udiv", tunsigned, "", "src0 / src1") +binop("udiv", tuint, "", "src0 / src1") # returns a boolean representing the carry resulting from the addition of # the two unsigned arguments. -binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0") +binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0") # returns a boolean representing the borrow resulting from the subtraction # of the two unsigned arguments. -binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0") +binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0") binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") -binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1") +binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") # # Comparisons @@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1") binop_compare("ige", tint, "", "src0 >= src1") binop_compare("ieq", tint, commutative, "src0 == src1") binop_compare("ine", tint, commutative, "src0 != src1") -binop_compare("ult", tunsigned, "", "src0 < src1") -binop_compare("uge", tunsigned, "", "src0 >= src1") +binop_compare("ult", tuint, "", "src0 < src1") +binop_compare("uge", tuint, "", "src0 >= src1") # integer-aware GLSL-style comparisons that compare floats and ints @@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E binop("ishl", tint, "", "src0 << src1") binop("ishr", tint, "", "src0 >> src1") -binop("ushr", tunsigned, "", "src0 >> src1") +binop("ushr", tuint, "", "src0 >> src1") # bitwise logic operators # @@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1") # integers. -binop("iand", tunsigned, commutative + associative, "src0 & src1") -binop("ior", tunsigned, commutative + associative, "src0 | src1") -binop("ixor", tunsigned, commutative + associative, "src0 ^ src1") +binop("iand", tuint, commutative + associative, "src0 & src1") +binop("ior", tuint, commutative + associative, "src0 | src1") +binop("ixor", tuint, commutative + associative, "src0 ^ src1") # floating point logic operators @@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "", binop("fmin", tfloat, "", "fminf(src0, src1)") binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") -binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1") +binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1") binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") -binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") # Saturated vector add for 4 8bit ints. binop("usadd_4x8", tint, commutative + associative, """ @@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) { binop("fpow", tfloat, "", "powf(src0, src1)") -binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, +binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") -binop_convert("bfm", tunsigned, tint, "", """ +binop_convert("bfm", tuint, tint, "", """ int offset = src0, bits = src1; if (offset < 0 || bits < 0 || offset + bits > 32) dst = 0; /* undefined per the spec */ @@ -535,7 +535,7 @@ if (!isnormal(dst)) # Combines the first component of each input to make a 2-component vector. -binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """ +binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ dst.x = src0.x; dst.y = src1.x; """) @@ -543,9 +543,9 @@ dst.y = src1.x; def triop(name, ty, const_expr): opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): - opcode(name, output_size, tunsigned, + opcode(name, output_size, tuint, [src1_size, src2_size, src3_size], - [tunsigned, tunsigned, tunsigned], "", const_expr) + [tuint, tuint, tuint], "", const_expr) triop("ffma", tfloat, "src0 * src1 + src2") @@ -559,11 +559,11 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") -opcode("bcsel", 0, tunsigned, [0, 0, 0], - [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2") +opcode("bcsel", 0, tuint, [0, 0, 0], + [tbool, tuint, tuint], "", "src0 ? src1 : src2") -triop("bfi", tunsigned, """ -unsigned mask = src0, insert = src1 & mask, base = src2; +triop("bfi", tuint, """ +unsigned mask = src0, insert = src1, base = src2; if (mask == 0) { dst = base; } else { @@ -572,12 +572,12 @@ if (mask == 0) { tmp >>= 1; insert <<= 1; } - dst = (base & ~mask) | insert; + dst = (base & ~mask) | (insert & mask); } """) -opcode("ubitfield_extract", 0, tunsigned, - [0, 1, 1], [tunsigned, tint, tint], "", """ +opcode("ubitfield_extract", 0, tuint, + [0, 1, 1], [tuint, tint, tint], "", """ unsigned base = src0; int offset = src1.x, bits = src2.x; if (bits == 0) { @@ -611,13 +611,13 @@ dst.z = src2.x; def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, src4_size, const_expr): - opcode(name, output_size, tunsigned, + opcode(name, output_size, tuint, [src1_size, src2_size, src3_size, src4_size], - [tunsigned, tunsigned, tunsigned, tunsigned], + [tuint, tuint, tuint, tuint], "", const_expr) -opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1], - [tunsigned, tunsigned, tint, tint], "", """ +opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1], + [tuint, tuint, tint, tint], "", """ unsigned base = src0, insert = src1; int offset = src2.x, bits = src3.x; if (bits == 0) { diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c index 7d8bdd7f2ca..cfc8e331128 100644 --- a/src/glsl/nir/nir_opt_copy_propagate.c +++ b/src/glsl/nir/nir_opt_copy_propagate.c @@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr) static bool is_vec(nir_alu_instr *instr) { - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { if (!instr->src[i].src.is_ssa) return false; + /* we handle modifiers in a separate pass */ + if (instr->src[i].abs || instr->src[i].negate) + return false; + } + return instr->op == nir_op_vec2 || instr->op == nir_op_vec3 || instr->op == nir_op_vec4; diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 2db209d434d..76bfc47c2a0 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state) case nir_texop_texture_samples: fprintf(fp, "texture_samples "); break; - + case nir_texop_samples_identical: + fprintf(fp, "samples_identical "); + break; default: unreachable("Invalid texture operation"); break; @@ -985,6 +987,16 @@ nir_print_shader(nir_shader *shader, FILE *fp) fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage)); + if (shader->info.name) + fprintf(fp, "name: %s\n", shader->info.name); + + if (shader->info.label) + fprintf(fp, "label: %s\n", shader->info.label); + + fprintf(fp, "inputs: %u\n", shader->num_inputs); + fprintf(fp, "outputs: %u\n", shader->num_outputs); + fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + nir_foreach_variable(var, &shader->uniforms) { print_var_decl(var, &state); } diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c index bb154407914..56d7e8162f3 100644 --- a/src/glsl/nir/nir_search.c +++ b/src/glsl/nir/nir_search.c @@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, } return true; case nir_type_int: - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: for (unsigned i = 0; i < num_components; ++i) { if (load->value.i[new_swizzle[i]] != const_val->data.i) @@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type, load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); load->value.i[0] = c->data.i; break; - case nir_type_unsigned: + case nir_type_uint: case nir_type_bool: load->value.u[0] = c->data.u; break; diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c index ed374b921fa..06879d64ee2 100644 --- a/src/glsl/nir/nir_validate.c +++ b/src/glsl/nir/nir_validate.c @@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state) { assert(instr->op < nir_num_opcodes); - validate_alu_dest(&instr->dest, state); - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { validate_alu_src(instr, i, state); } + + validate_alu_dest(&instr->dest, state); } static void @@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) validate_src(&instr->src[i], state); } + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + validate_deref_var(instr, instr->variables[i], state); + } + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { unsigned components_written = nir_intrinsic_infos[instr->intrinsic].dest_components; @@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) validate_dest(&instr->dest, state); } - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr, instr->variables[i], state); - } - switch (instr->intrinsic) { case nir_intrinsic_load_var: { const struct glsl_type *type = @@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) static void validate_tex_instr(nir_tex_instr *instr, validate_state *state) { - validate_dest(&instr->dest, state); - bool src_type_seen[nir_num_tex_src_types]; for (unsigned i = 0; i < nir_num_tex_src_types; i++) src_type_seen[i] = false; @@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state) if (instr->sampler != NULL) validate_deref_var(instr, instr->sampler, state); + + validate_dest(&instr->dest, state); } static void diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c index 70610ca0f66..86282d25e0a 100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@ -2026,7 +2026,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, switch (glsl_get_sampler_result_type(sampler_type)) { case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; + case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; default: unreachable("Invalid base type for sampler result"); diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp index e38a0e93058..cd58213c019 100644 --- a/src/glsl/opt_tree_grafting.cpp +++ b/src/glsl/opt_tree_grafting.cpp @@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir) case ir_lod: case ir_query_levels: case ir_texture_samples: + case ir_samples_identical: break; case ir_txb: if (do_graft(&ir->lod_info.bias)) |