summaryrefslogtreecommitdiffstats
path: root/src/glsl
diff options
context:
space:
mode:
Diffstat (limited to 'src/glsl')
-rw-r--r--src/glsl/Android.gen.mk3
-rw-r--r--src/glsl/Makefile.sources3
-rw-r--r--src/glsl/ast.h57
-rw-r--r--src/glsl/ast_to_hir.cpp1055
-rw-r--r--src/glsl/ast_type.cpp153
-rw-r--r--src/glsl/builtin_functions.cpp59
-rw-r--r--src/glsl/builtin_variables.cpp70
-rw-r--r--src/glsl/glcpp/glcpp-parse.y5
-rw-r--r--src/glsl/glsl_parser.yy117
-rw-r--r--src/glsl/glsl_parser_extras.cpp54
-rw-r--r--src/glsl/glsl_parser_extras.h7
-rw-r--r--src/glsl/ir.cpp11
-rw-r--r--src/glsl/ir.h4
-rw-r--r--src/glsl/ir_clone.cpp2
-rw-r--r--src/glsl/ir_equals.cpp10
-rw-r--r--src/glsl/ir_hv_accept.cpp1
-rw-r--r--src/glsl/ir_print_visitor.cpp10
-rw-r--r--src/glsl/ir_rvalue_visitor.cpp1
-rw-r--r--src/glsl/link_varyings.cpp46
-rw-r--r--src/glsl/linker.cpp33
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp41
-rw-r--r--src/glsl/nir/glsl_types.cpp1
-rw-r--r--src/glsl/nir/glsl_types.h5
-rw-r--r--src/glsl/nir/nir.c79
-rw-r--r--src/glsl/nir/nir.h79
-rw-r--r--src/glsl/nir/nir_builder.h23
-rw-r--r--src/glsl/nir/nir_clone.c674
-rw-r--r--src/glsl/nir/nir_constant_expressions.py2
-rw-r--r--src/glsl/nir/nir_intrinsics.h1
-rw-r--r--src/glsl/nir/nir_lower_clip.c2
-rw-r--r--src/glsl/nir/nir_lower_idiv.c8
-rw-r--r--src/glsl/nir/nir_lower_io.c2
-rw-r--r--src/glsl/nir/nir_lower_tex.c87
-rw-r--r--src/glsl/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/glsl/nir/nir_metadata.c36
-rw-r--r--src/glsl/nir/nir_opcodes.py82
-rw-r--r--src/glsl/nir/nir_opt_copy_propagate.c7
-rw-r--r--src/glsl/nir/nir_print.c14
-rw-r--r--src/glsl/nir/nir_search.c4
-rw-r--r--src/glsl/nir/nir_validate.c18
-rw-r--r--src/glsl/nir/spirv_to_nir.c2
-rw-r--r--src/glsl/opt_tree_grafting.cpp1
42 files changed, 2131 insertions, 740 deletions
diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk
index 6898fb0d492..59cc8577a6e 100644
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -38,7 +38,8 @@ LOCAL_C_INCLUDES += \
$(MESA_TOP)/src/glsl/nir
LOCAL_EXPORT_C_INCLUDE_DIRS += \
- $(intermediates)/nir
+ $(intermediates)/nir \
+ $(MESA_TOP)/src/glsl/nir
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
$(LIBGLCPP_GENERATED_FILES) \
diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
index 957fd6b90ba..0c9fd75d206 100644
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -22,10 +22,12 @@ NIR_FILES = \
nir/glsl_to_nir.h \
nir/glsl_types.cpp \
nir/glsl_types.h \
+ nir/builtin_type_macros.h \
nir/nir.c \
nir/nir.h \
nir/nir_array.h \
nir/nir_builder.h \
+ nir/nir_clone.c \
nir/nir_constant_expressions.h \
nir/nir_control_flow.c \
nir/nir_control_flow.h \
@@ -102,7 +104,6 @@ LIBGLSL_FILES = \
blob.c \
blob.h \
builtin_functions.cpp \
- builtin_type_macros.h \
builtin_types.cpp \
builtin_variables.cpp \
glsl_parser_extras.cpp \
diff --git a/src/glsl/ast.h b/src/glsl/ast.h
index 1b75234d578..3bea63ea0ed 100644
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -336,7 +336,7 @@ public:
array_dimensions.push_tail(&dim->link);
}
- const bool is_single_dimension()
+ bool is_single_dimension() const
{
return this->array_dimensions.tail_pred->prev != NULL &&
this->array_dimensions.tail_pred->prev->is_head_sentinel();
@@ -350,6 +350,26 @@ public:
exec_list array_dimensions;
};
+class ast_layout_expression : public ast_node {
+public:
+ ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr)
+ {
+ set_location(locp);
+ layout_const_expressions.push_tail(&expr->link);
+ }
+
+ bool process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+ const char *qual_indentifier,
+ unsigned *value, bool can_be_zero);
+
+ void merge_qualifier(ast_layout_expression *l_expr)
+ {
+ layout_const_expressions.append_list(&l_expr->layout_const_expressions);
+ }
+
+ exec_list layout_const_expressions;
+};
+
/**
* C-style aggregate initialization class
*
@@ -558,7 +578,7 @@ struct ast_type_qualifier {
unsigned precision:2;
/** Geometry shader invocations for GL_ARB_gpu_shader5. */
- int invocations;
+ ast_layout_expression *invocations;
/**
* Location specified via GL_ARB_explicit_attrib_location layout
@@ -566,20 +586,20 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_location is set.
*/
- int location;
+ ast_expression *location;
/**
* Index specified via GL_ARB_explicit_attrib_location layout
*
* \note
* This field is only valid if \c explicit_index is set.
*/
- int index;
+ ast_expression *index;
/** Maximum output vertices in GLSL 1.50 geometry shaders. */
- int max_vertices;
+ ast_layout_expression *max_vertices;
/** Stream in GLSL 1.50 geometry shaders. */
- unsigned stream;
+ ast_expression *stream;
/**
* Input or output primitive type in GLSL 1.50 geometry shaders
@@ -593,7 +613,7 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_binding is set.
*/
- int binding;
+ ast_expression *binding;
/**
* Offset specified via GL_ARB_shader_atomic_counter's "offset"
@@ -602,14 +622,14 @@ struct ast_type_qualifier {
* \note
* This field is only valid if \c explicit_offset is set.
*/
- int offset;
+ ast_expression *offset;
/**
* Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}"
* layout qualifier. Element i of this array is only valid if
* flags.q.local_size & (1 << i) is set.
*/
- int local_size[3];
+ ast_layout_expression *local_size[3];
/** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */
GLenum vertex_spacing;
@@ -621,7 +641,7 @@ struct ast_type_qualifier {
bool point_mode;
/** Tessellation control shader: number of output vertices */
- int vertices;
+ ast_layout_expression *vertices;
/**
* Image format specified with an ARB_shader_image_load_store
@@ -752,7 +772,7 @@ public:
class ast_fully_specified_type : public ast_node {
public:
virtual void print(void) const;
- bool has_qualifiers() const;
+ bool has_qualifiers(_mesa_glsl_parse_state *state) const;
ast_fully_specified_type() : qualifier(), specifier(NULL)
{
@@ -1093,17 +1113,13 @@ public:
class ast_tcs_output_layout : public ast_node
{
public:
- ast_tcs_output_layout(const struct YYLTYPE &locp, int vertices)
- : vertices(vertices)
+ ast_tcs_output_layout(const struct YYLTYPE &locp)
{
set_location(locp);
}
virtual ir_rvalue *hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state);
-
-private:
- const int vertices;
};
@@ -1135,9 +1151,12 @@ private:
class ast_cs_input_layout : public ast_node
{
public:
- ast_cs_input_layout(const struct YYLTYPE &locp, const unsigned *local_size)
+ ast_cs_input_layout(const struct YYLTYPE &locp,
+ ast_layout_expression **local_size)
{
- memcpy(this->local_size, local_size, sizeof(this->local_size));
+ for (int i = 0; i < 3; i++) {
+ this->local_size[i] = local_size[i];
+ }
set_location(locp);
}
@@ -1145,7 +1164,7 @@ public:
struct _mesa_glsl_parse_state *state);
private:
- unsigned local_size[3];
+ ast_layout_expression *local_size[3];
};
/*@}*/
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 65db2618895..52881a4da7a 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2491,7 +2491,7 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
"uniform block layout qualifiers row_major and "
"column_major may not be applied to variables "
"outside of uniform blocks");
- } else if (!type->is_matrix()) {
+ } else if (!type->without_array()->is_matrix()) {
/* The OpenGL ES 3.0 conformance tests did not originally allow
* matrix layout qualifiers on non-matrices. However, the OpenGL
* 4.4 and OpenGL ES 3.0 (revision TBD) specifications were
@@ -2502,39 +2502,88 @@ validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state,
"uniform block layout qualifiers row_major and "
"column_major applied to non-matrix types may "
"be rejected by older compilers");
- } else if (type->is_record()) {
- /* We allow 'layout(row_major)' on structure types because it's the only
- * way to get row-major layouts on matrices contained in structures.
- */
- _mesa_glsl_warning(loc, state,
- "uniform block layout qualifiers row_major and "
- "column_major applied to structure types is not "
- "strictly conformant and may be rejected by other "
- "compilers");
}
}
static bool
-validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
+process_qualifier_constant(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
- const glsl_type *type,
- const ast_type_qualifier *qual)
+ const char *qual_indentifier,
+ ast_expression *const_expression,
+ unsigned *value)
+{
+ exec_list dummy_instructions;
+
+ if (const_expression == NULL) {
+ *value = 0;
+ return true;
+ }
+
+ ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+ ir_constant *const const_int = ir->constant_expression_value();
+ if (const_int == NULL || !const_int->type->is_integer()) {
+ _mesa_glsl_error(loc, state, "%s must be an integral constant "
+ "expression", qual_indentifier);
+ return false;
+ }
+
+ if (const_int->value.i[0] < 0) {
+ _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)",
+ qual_indentifier, const_int->value.u[0]);
+ return false;
+ }
+
+ /* If the location is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the location isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
+
+ *value = const_int->value.u[0];
+ return true;
+}
+
+static bool
+validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state,
+ unsigned stream)
+{
+ if (stream >= state->ctx->Const.MaxVertexStreams) {
+ _mesa_glsl_error(loc, state,
+ "invalid stream specified %d is larger than "
+ "MAX_VERTEX_STREAMS - 1 (%d).",
+ stream, state->ctx->Const.MaxVertexStreams - 1);
+ return false;
+ }
+
+ return true;
+}
+
+static void
+apply_explicit_binding(struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc,
+ ir_variable *var,
+ const glsl_type *type,
+ const ast_type_qualifier *qual)
{
if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(loc, state,
"the \"binding\" qualifier only applies to uniforms and "
"shader storage buffer objects");
- return false;
+ return;
}
- if (qual->binding < 0) {
- _mesa_glsl_error(loc, state, "binding values must be >= 0");
- return false;
+ unsigned qual_binding;
+ if (!process_qualifier_constant(state, loc, "binding", qual->binding,
+ &qual_binding)) {
+ return;
}
const struct gl_context *const ctx = state->ctx;
unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1;
- unsigned max_index = qual->binding + elements - 1;
+ unsigned max_index = qual_binding + elements - 1;
const glsl_type *base_type = type->without_array();
if (base_type->is_interface()) {
@@ -2550,11 +2599,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
*/
if (qual->flags.q.uniform &&
max_index >= ctx->Const.MaxUniformBufferBindings) {
- _mesa_glsl_error(loc, state, "layout(binding = %d) for %d UBOs exceeds "
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds "
"the maximum number of UBO binding points (%d)",
- qual->binding, elements,
+ qual_binding, elements,
ctx->Const.MaxUniformBufferBindings);
- return false;
+ return;
}
/* SSBOs. From page 67 of the GLSL 4.30 specification:
@@ -2568,11 +2617,11 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
*/
if (qual->flags.q.buffer &&
max_index >= ctx->Const.MaxShaderStorageBufferBindings) {
- _mesa_glsl_error(loc, state, "layout(binding = %d) for %d SSBOs exceeds "
+ _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds "
"the maximum number of SSBO binding points (%d)",
- qual->binding, elements,
+ qual_binding, elements,
ctx->Const.MaxShaderStorageBufferBindings);
- return false;
+ return;
}
} else if (base_type->is_sampler()) {
/* Samplers. From page 63 of the GLSL 4.20 specification:
@@ -2587,19 +2636,19 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
if (max_index >= limit) {
_mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers "
"exceeds the maximum number of texture image units "
- "(%d)", qual->binding, elements, limit);
+ "(%u)", qual_binding, elements, limit);
- return false;
+ return;
}
} else if (base_type->contains_atomic()) {
assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS);
- if (unsigned(qual->binding) >= ctx->Const.MaxAtomicBufferBindings) {
+ if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) {
_mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the "
" maximum number of atomic counter buffer bindings"
- "(%d)", qual->binding,
+ "(%u)", qual_binding,
ctx->Const.MaxAtomicBufferBindings);
- return false;
+ return;
}
} else if (state->is_version(420, 310) && base_type->is_image()) {
assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
@@ -2607,17 +2656,20 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
_mesa_glsl_error(loc, state, "Image binding %d exceeds the "
" maximum number of image units (%d)", max_index,
ctx->Const.MaxImageUnits);
- return false;
+ return;
}
} else {
_mesa_glsl_error(loc, state,
"the \"binding\" qualifier only applies to uniform "
"blocks, opaque variables, or arrays thereof");
- return false;
+ return;
}
- return true;
+ var->data.explicit_binding = true;
+ var->data.binding = qual_binding;
+
+ return;
}
@@ -2660,20 +2712,26 @@ interpret_interpolation_qualifier(const struct ast_type_qualifier *qual,
static void
-validate_explicit_location(const struct ast_type_qualifier *qual,
- ir_variable *var,
- struct _mesa_glsl_parse_state *state,
- YYLTYPE *loc)
+apply_explicit_location(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
{
bool fail = false;
+ unsigned qual_location;
+ if (!process_qualifier_constant(state, loc, "location", qual->location,
+ &qual_location)) {
+ return;
+ }
+
/* Checks for GL_ARB_explicit_uniform_location. */
if (qual->flags.q.uniform) {
if (!state->check_explicit_uniform_location_allowed(loc, var))
return;
const struct gl_context *const ctx = state->ctx;
- unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
+ unsigned max_loc = qual_location + var->type->uniform_locations() - 1;
if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
_mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
@@ -2683,7 +2741,7 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
}
var->data.explicit_location = true;
- var->data.location = qual->location;
+ var->data.location = qual_location;
return;
}
@@ -2768,30 +2826,40 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
switch (state->stage) {
case MESA_SHADER_VERTEX:
var->data.location = (var->data.mode == ir_var_shader_in)
- ? (qual->location + VERT_ATTRIB_GENERIC0)
- : (qual->location + VARYING_SLOT_VAR0);
+ ? (qual_location + VERT_ATTRIB_GENERIC0)
+ : (qual_location + VARYING_SLOT_VAR0);
break;
case MESA_SHADER_TESS_CTRL:
case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY:
if (var->data.patch)
- var->data.location = qual->location + VARYING_SLOT_PATCH0;
+ var->data.location = qual_location + VARYING_SLOT_PATCH0;
else
- var->data.location = qual->location + VARYING_SLOT_VAR0;
+ var->data.location = qual_location + VARYING_SLOT_VAR0;
break;
case MESA_SHADER_FRAGMENT:
var->data.location = (var->data.mode == ir_var_shader_out)
- ? (qual->location + FRAG_RESULT_DATA0)
- : (qual->location + VARYING_SLOT_VAR0);
+ ? (qual_location + FRAG_RESULT_DATA0)
+ : (qual_location + VARYING_SLOT_VAR0);
break;
case MESA_SHADER_COMPUTE:
assert(!"Unexpected shader type");
break;
}
- if (qual->flags.q.explicit_index) {
+ /* Check if index was set for the uniform instead of the function */
+ if (qual->flags.q.explicit_index && qual->flags.q.subroutine) {
+ _mesa_glsl_error(loc, state, "an index qualifier can only be "
+ "used with subroutine functions");
+ return;
+ }
+
+ unsigned qual_index;
+ if (qual->flags.q.explicit_index &&
+ process_qualifier_constant(state, loc, "index", qual->index,
+ &qual_index)) {
/* From the GLSL 4.30 specification, section 4.4.2 (Output
* Layout Qualifiers):
*
@@ -2801,12 +2869,12 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
* Older specifications don't mandate a behavior; we take
* this as a clarification and always generate the error.
*/
- if (qual->index < 0 || qual->index > 1) {
+ if (qual_index > 1) {
_mesa_glsl_error(loc, state,
"explicit index may only be 0 or 1");
} else {
var->data.explicit_index = true;
- var->data.index = qual->index;
+ var->data.index = qual_index;
}
}
}
@@ -2939,6 +3007,221 @@ validate_array_dimensions(const glsl_type *t,
}
static void
+apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
+ ir_variable *var,
+ struct _mesa_glsl_parse_state *state,
+ YYLTYPE *loc)
+{
+ if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
+
+ /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
+ *
+ * "Within any shader, the first redeclarations of gl_FragCoord
+ * must appear before any use of gl_FragCoord."
+ *
+ * Generate a compiler error if above condition is not met by the
+ * fragment shader.
+ */
+ ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
+ if (earlier != NULL &&
+ earlier->data.used &&
+ !state->fs_redeclares_gl_fragcoord) {
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord used before its first redeclaration "
+ "in fragment shader");
+ }
+
+ /* Make sure all gl_FragCoord redeclarations specify the same layout
+ * qualifiers.
+ */
+ if (is_conflicting_fragcoord_redeclaration(state, qual)) {
+ const char *const qual_string =
+ get_layout_qualifier_string(qual->flags.q.origin_upper_left,
+ qual->flags.q.pixel_center_integer);
+
+ const char *const state_string =
+ get_layout_qualifier_string(state->fs_origin_upper_left,
+ state->fs_pixel_center_integer);
+
+ _mesa_glsl_error(loc, state,
+ "gl_FragCoord redeclared with different layout "
+ "qualifiers (%s) and (%s) ",
+ state_string,
+ qual_string);
+ }
+ state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
+ state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
+ !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
+ state->fs_redeclares_gl_fragcoord =
+ state->fs_origin_upper_left ||
+ state->fs_pixel_center_integer ||
+ state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
+ }
+
+ var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
+ var->data.origin_upper_left = qual->flags.q.origin_upper_left;
+ if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
+ && (strcmp(var->name, "gl_FragCoord") != 0)) {
+ const char *const qual_string = (qual->flags.q.origin_upper_left)
+ ? "origin_upper_left" : "pixel_center_integer";
+
+ _mesa_glsl_error(loc, state,
+ "layout qualifier `%s' can only be applied to "
+ "fragment shader input `gl_FragCoord'",
+ qual_string);
+ }
+
+ if (qual->flags.q.explicit_location) {
+ apply_explicit_location(qual, var, state, loc);
+ } else if (qual->flags.q.explicit_index) {
+ if (!qual->flags.q.subroutine_def)
+ _mesa_glsl_error(loc, state,
+ "explicit index requires explicit location");
+ }
+
+ if (qual->flags.q.explicit_binding) {
+ apply_explicit_binding(state, loc, var, var->type, qual);
+ }
+
+ if (state->stage == MESA_SHADER_GEOMETRY &&
+ qual->flags.q.out && qual->flags.q.stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, loc, "stream", qual->stream,
+ &qual_stream) &&
+ validate_stream_qualifier(loc, state, qual_stream)) {
+ var->data.stream = qual_stream;
+ }
+ }
+
+ if (var->type->contains_atomic()) {
+ if (var->data.mode == ir_var_uniform) {
+ if (var->data.explicit_binding) {
+ unsigned *offset =
+ &state->atomic_counter_offsets[var->data.binding];
+
+ if (*offset % ATOMIC_COUNTER_SIZE)
+ _mesa_glsl_error(loc, state,
+ "misaligned atomic counter offset");
+
+ var->data.atomic.offset = *offset;
+ *offset += var->type->atomic_size();
+
+ } else {
+ _mesa_glsl_error(loc, state,
+ "atomic counters require explicit binding point");
+ }
+ } else if (var->data.mode != ir_var_function_in) {
+ _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
+ "function parameters or uniform-qualified "
+ "global variables");
+ }
+ }
+
+ /* Is the 'layout' keyword used with parameters that allow relaxed checking.
+ * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
+ * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
+ * allowed the layout qualifier to be used with 'varying' and 'attribute'.
+ * These extensions and all following extensions that add the 'layout'
+ * keyword have been modified to require the use of 'in' or 'out'.
+ *
+ * The following extension do not allow the deprecated keywords:
+ *
+ * GL_AMD_conservative_depth
+ * GL_ARB_conservative_depth
+ * GL_ARB_gpu_shader5
+ * GL_ARB_separate_shader_objects
+ * GL_ARB_tessellation_shader
+ * GL_ARB_transform_feedback3
+ * GL_ARB_uniform_buffer_object
+ *
+ * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
+ * allow layout with the deprecated keywords.
+ */
+ const bool relaxed_layout_qualifier_checking =
+ state->ARB_fragment_coord_conventions_enable;
+
+ const bool uses_deprecated_qualifier = qual->flags.q.attribute
+ || qual->flags.q.varying;
+ if (qual->has_layout() && uses_deprecated_qualifier) {
+ if (relaxed_layout_qualifier_checking) {
+ _mesa_glsl_warning(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ } else {
+ _mesa_glsl_error(loc, state,
+ "`layout' qualifier may not be used with "
+ "`attribute' or `varying'");
+ }
+ }
+
+ /* Layout qualifiers for gl_FragDepth, which are enabled by extension
+ * AMD_conservative_depth.
+ */
+ int depth_layout_count = qual->flags.q.depth_any
+ + qual->flags.q.depth_greater
+ + qual->flags.q.depth_less
+ + qual->flags.q.depth_unchanged;
+ if (depth_layout_count > 0
+ && !state->AMD_conservative_depth_enable
+ && !state->ARB_conservative_depth_enable) {
+ _mesa_glsl_error(loc, state,
+ "extension GL_AMD_conservative_depth or "
+ "GL_ARB_conservative_depth must be enabled "
+ "to use depth layout qualifiers");
+ } else if (depth_layout_count > 0
+ && strcmp(var->name, "gl_FragDepth") != 0) {
+ _mesa_glsl_error(loc, state,
+ "depth layout qualifiers can be applied only to "
+ "gl_FragDepth");
+ } else if (depth_layout_count > 1
+ && strcmp(var->name, "gl_FragDepth") == 0) {
+ _mesa_glsl_error(loc, state,
+ "at most one depth layout qualifier can be applied to "
+ "gl_FragDepth");
+ }
+ if (qual->flags.q.depth_any)
+ var->data.depth_layout = ir_depth_layout_any;
+ else if (qual->flags.q.depth_greater)
+ var->data.depth_layout = ir_depth_layout_greater;
+ else if (qual->flags.q.depth_less)
+ var->data.depth_layout = ir_depth_layout_less;
+ else if (qual->flags.q.depth_unchanged)
+ var->data.depth_layout = ir_depth_layout_unchanged;
+ else
+ var->data.depth_layout = ir_depth_layout_none;
+
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(loc, state,
+ "uniform and shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform or shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ validate_matrix_layout_for_type(state, loc, var->type, var);
+ }
+
+ /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
+ * Inputs):
+ *
+ * "Fragment shaders also allow the following layout qualifier on in only
+ * (not with variable declarations)
+ * layout-qualifier-id
+ * early_fragment_tests
+ * [...]"
+ */
+ if (qual->flags.q.early_fragment_tests) {
+ _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
+ "valid in fragment shader input layout declaration.");
+ }
+}
+
+static void
apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
ir_variable *var,
struct _mesa_glsl_parse_state *state,
@@ -2992,11 +3275,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
select_gles_precision(qual->precision, var->type, state, loc);
}
- if (state->stage == MESA_SHADER_GEOMETRY &&
- qual->flags.q.out && qual->flags.q.stream) {
- var->data.stream = qual->stream;
- }
-
if (qual->flags.q.patch)
var->data.patch = 1;
@@ -3136,102 +3414,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode,
state, loc);
- var->data.pixel_center_integer = qual->flags.q.pixel_center_integer;
- var->data.origin_upper_left = qual->flags.q.origin_upper_left;
- if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer)
- && (strcmp(var->name, "gl_FragCoord") != 0)) {
- const char *const qual_string = (qual->flags.q.origin_upper_left)
- ? "origin_upper_left" : "pixel_center_integer";
-
- _mesa_glsl_error(loc, state,
- "layout qualifier `%s' can only be applied to "
- "fragment shader input `gl_FragCoord'",
- qual_string);
- }
-
- if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) {
-
- /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says:
- *
- * "Within any shader, the first redeclarations of gl_FragCoord
- * must appear before any use of gl_FragCoord."
- *
- * Generate a compiler error if above condition is not met by the
- * fragment shader.
- */
- ir_variable *earlier = state->symbols->get_variable("gl_FragCoord");
- if (earlier != NULL &&
- earlier->data.used &&
- !state->fs_redeclares_gl_fragcoord) {
- _mesa_glsl_error(loc, state,
- "gl_FragCoord used before its first redeclaration "
- "in fragment shader");
- }
-
- /* Make sure all gl_FragCoord redeclarations specify the same layout
- * qualifiers.
- */
- if (is_conflicting_fragcoord_redeclaration(state, qual)) {
- const char *const qual_string =
- get_layout_qualifier_string(qual->flags.q.origin_upper_left,
- qual->flags.q.pixel_center_integer);
-
- const char *const state_string =
- get_layout_qualifier_string(state->fs_origin_upper_left,
- state->fs_pixel_center_integer);
-
- _mesa_glsl_error(loc, state,
- "gl_FragCoord redeclared with different layout "
- "qualifiers (%s) and (%s) ",
- state_string,
- qual_string);
- }
- state->fs_origin_upper_left = qual->flags.q.origin_upper_left;
- state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer;
- state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers =
- !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer;
- state->fs_redeclares_gl_fragcoord =
- state->fs_origin_upper_left ||
- state->fs_pixel_center_integer ||
- state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers;
- }
-
- if (qual->flags.q.explicit_location) {
- validate_explicit_location(qual, var, state, loc);
- } else if (qual->flags.q.explicit_index) {
- _mesa_glsl_error(loc, state, "explicit index requires explicit location");
- }
-
- if (qual->flags.q.explicit_binding &&
- validate_binding_qualifier(state, loc, var->type, qual)) {
- var->data.explicit_binding = true;
- var->data.binding = qual->binding;
- }
-
- if (var->type->contains_atomic()) {
- if (var->data.mode == ir_var_uniform) {
- if (var->data.explicit_binding) {
- unsigned *offset =
- &state->atomic_counter_offsets[var->data.binding];
-
- if (*offset % ATOMIC_COUNTER_SIZE)
- _mesa_glsl_error(loc, state,
- "misaligned atomic counter offset");
-
- var->data.atomic.offset = *offset;
- *offset += var->type->atomic_size();
-
- } else {
- _mesa_glsl_error(loc, state,
- "atomic counters require explicit binding point");
- }
- } else if (var->data.mode != ir_var_function_in) {
- _mesa_glsl_error(loc, state, "atomic counters may only be declared as "
- "function parameters or uniform-qualified "
- "global variables");
- }
- }
-
/* Does the declaration use the deprecated 'attribute' or 'varying'
* keywords?
*/
@@ -3267,114 +3449,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
"`out' or `varying' variables between shader stages");
}
-
- /* Is the 'layout' keyword used with parameters that allow relaxed checking.
- * Many implementations of GL_ARB_fragment_coord_conventions_enable and some
- * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable
- * allowed the layout qualifier to be used with 'varying' and 'attribute'.
- * These extensions and all following extensions that add the 'layout'
- * keyword have been modified to require the use of 'in' or 'out'.
- *
- * The following extension do not allow the deprecated keywords:
- *
- * GL_AMD_conservative_depth
- * GL_ARB_conservative_depth
- * GL_ARB_gpu_shader5
- * GL_ARB_separate_shader_objects
- * GL_ARB_tessellation_shader
- * GL_ARB_transform_feedback3
- * GL_ARB_uniform_buffer_object
- *
- * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5
- * allow layout with the deprecated keywords.
- */
- const bool relaxed_layout_qualifier_checking =
- state->ARB_fragment_coord_conventions_enable;
-
- if (qual->has_layout() && uses_deprecated_qualifier) {
- if (relaxed_layout_qualifier_checking) {
- _mesa_glsl_warning(loc, state,
- "`layout' qualifier may not be used with "
- "`attribute' or `varying'");
- } else {
- _mesa_glsl_error(loc, state,
- "`layout' qualifier may not be used with "
- "`attribute' or `varying'");
- }
- }
-
- /* Layout qualifiers for gl_FragDepth, which are enabled by extension
- * AMD_conservative_depth.
- */
- int depth_layout_count = qual->flags.q.depth_any
- + qual->flags.q.depth_greater
- + qual->flags.q.depth_less
- + qual->flags.q.depth_unchanged;
- if (depth_layout_count > 0
- && !state->AMD_conservative_depth_enable
- && !state->ARB_conservative_depth_enable) {
- _mesa_glsl_error(loc, state,
- "extension GL_AMD_conservative_depth or "
- "GL_ARB_conservative_depth must be enabled "
- "to use depth layout qualifiers");
- } else if (depth_layout_count > 0
- && strcmp(var->name, "gl_FragDepth") != 0) {
- _mesa_glsl_error(loc, state,
- "depth layout qualifiers can be applied only to "
- "gl_FragDepth");
- } else if (depth_layout_count > 1
- && strcmp(var->name, "gl_FragDepth") == 0) {
- _mesa_glsl_error(loc, state,
- "at most one depth layout qualifier can be applied to "
- "gl_FragDepth");
- }
- if (qual->flags.q.depth_any)
- var->data.depth_layout = ir_depth_layout_any;
- else if (qual->flags.q.depth_greater)
- var->data.depth_layout = ir_depth_layout_greater;
- else if (qual->flags.q.depth_less)
- var->data.depth_layout = ir_depth_layout_less;
- else if (qual->flags.q.depth_unchanged)
- var->data.depth_layout = ir_depth_layout_unchanged;
- else
- var->data.depth_layout = ir_depth_layout_none;
-
- if (qual->flags.q.std140 ||
- qual->flags.q.std430 ||
- qual->flags.q.packed ||
- qual->flags.q.shared) {
- _mesa_glsl_error(loc, state,
- "uniform and shader storage block layout qualifiers "
- "std140, std430, packed, and shared can only be "
- "applied to uniform or shader storage blocks, not "
- "members");
- }
-
if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) {
_mesa_glsl_error(loc, state,
"the shared storage qualifiers can only be used with "
"compute shaders");
}
- if (qual->flags.q.row_major || qual->flags.q.column_major) {
- validate_matrix_layout_for_type(state, loc, var->type, var);
- }
-
apply_image_qualifier_to_variable(qual, var, state, loc);
-
- /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader
- * Inputs):
- *
- * "Fragment shaders also allow the following layout qualifier on in only
- * (not with variable declarations)
- * layout-qualifier-id
- * early_fragment_tests
- * [...]"
- */
- if (qual->flags.q.early_fragment_tests) {
- _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only "
- "valid in fragment shader input layout declaration.");
- }
}
/**
@@ -3798,7 +3879,17 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
unsigned num_vertices = 0;
if (state->tcs_output_vertices_specified) {
- num_vertices = state->out_qualifier->vertices;
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices",
+ &num_vertices, false)) {
+ return;
+ }
+
+ if (num_vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", num_vertices);
+ return;
+ }
}
if (!var->type->is_array() && !var->data.patch) {
@@ -4032,9 +4123,18 @@ ast_declarator_list::hir(exec_list *instructions,
*/
if (decl_type && decl_type->contains_atomic()) {
if (type->qualifier.flags.q.explicit_binding &&
- type->qualifier.flags.q.explicit_offset)
- state->atomic_counter_offsets[type->qualifier.binding] =
- type->qualifier.offset;
+ type->qualifier.flags.q.explicit_offset) {
+ unsigned qual_binding;
+ unsigned qual_offset;
+ if (process_qualifier_constant(state, &loc, "binding",
+ type->qualifier.binding,
+ &qual_binding)
+ && process_qualifier_constant(state, &loc, "offset",
+ type->qualifier.offset,
+ &qual_offset)) {
+ state->atomic_counter_offsets[qual_binding] = qual_offset;
+ }
+ }
}
if (this->declarations.is_empty()) {
@@ -4188,6 +4288,8 @@ ast_declarator_list::hir(exec_list *instructions,
apply_type_qualifier_to_variable(& this->type->qualifier, var, state,
& loc, false);
+ apply_layout_qualifier_to_variable(&this->type->qualifier, var, state,
+ &loc);
if (this->type->qualifier.flags.q.invariant) {
if (!is_varying_var(var, state->stage)) {
@@ -4983,7 +5085,7 @@ ast_function::hir(exec_list *instructions,
/* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec:
* "No qualifier is allowed on the return type of a function."
*/
- if (this->return_type->has_qualifiers()) {
+ if (this->return_type->has_qualifiers(state)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"function `%s' return type has qualifiers", name);
@@ -5115,6 +5217,27 @@ ast_function::hir(exec_list *instructions,
if (this->return_type->qualifier.flags.q.subroutine_def) {
int idx;
+ if (this->return_type->qualifier.flags.q.explicit_index) {
+ unsigned qual_index;
+ if (process_qualifier_constant(state, &loc, "index",
+ this->return_type->qualifier.index,
+ &qual_index)) {
+ if (!state->has_explicit_uniform_location()) {
+ _mesa_glsl_error(&loc, state, "subroutine index requires "
+ "GL_ARB_explicit_uniform_location or "
+ "GLSL 4.30");
+ } else if (qual_index >= MAX_SUBROUTINES) {
+ _mesa_glsl_error(&loc, state,
+ "invalid subroutine index (%d) index must "
+ "be a number between 0 and "
+ "GL_MAX_SUBROUTINES - 1 (%d)", qual_index,
+ MAX_SUBROUTINES - 1);
+ } else {
+ f->subroutine_index = qual_index;
+ }
+ }
+ }
+
f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length();
f->subroutine_types = ralloc_array(state, const struct glsl_type *,
f->num_subroutine_types);
@@ -6046,27 +6169,19 @@ ast_type_specifier::hir(exec_list *instructions,
* stored in \c *fields_ret.
*/
unsigned
-ast_process_structure_or_interface_block(exec_list *instructions,
- struct _mesa_glsl_parse_state *state,
- exec_list *declarations,
- YYLTYPE &loc,
- glsl_struct_field **fields_ret,
- bool is_interface,
- enum glsl_matrix_layout matrix_layout,
- bool allow_reserved_names,
- ir_variable_mode var_mode,
- ast_type_qualifier *layout)
+ast_process_struct_or_iface_block_members(exec_list *instructions,
+ struct _mesa_glsl_parse_state *state,
+ exec_list *declarations,
+ glsl_struct_field **fields_ret,
+ bool is_interface,
+ enum glsl_matrix_layout matrix_layout,
+ bool allow_reserved_names,
+ ir_variable_mode var_mode,
+ ast_type_qualifier *layout,
+ unsigned block_stream)
{
unsigned decl_count = 0;
- /* For blocks that accept memory qualifiers (i.e. shader storage), verify
- * that we don't have incompatible qualifiers
- */
- if (layout && layout->flags.q.read_only && layout->flags.q.write_only) {
- _mesa_glsl_error(&loc, state,
- "Interface block sets both readonly and writeonly");
- }
-
/* Make an initial pass over the list of fields to determine how
* many there are. Each element in this list is an ast_declarator_list.
* This means that we actually need to count the number of elements in the
@@ -6087,6 +6202,7 @@ ast_process_structure_or_interface_block(exec_list *instructions,
unsigned i = 0;
foreach_list_typed (ast_declarator_list, decl_list, link, declarations) {
const char *type_name;
+ YYLTYPE loc = decl_list->get_location();
decl_list->type->specifier->hir(instructions, state);
@@ -6101,74 +6217,120 @@ ast_process_structure_or_interface_block(exec_list *instructions,
const glsl_type *decl_type =
decl_list->type->glsl_type(& type_name, state);
- foreach_list_typed (ast_declaration, decl, link,
- &decl_list->declarations) {
- if (!allow_reserved_names)
- validate_identifier(decl->identifier, loc, state);
+ const struct ast_type_qualifier *const qual =
+ &decl_list->type->qualifier;
- /* From section 4.3.9 of the GLSL 4.40 spec:
- *
- * "[In interface blocks] opaque types are not allowed."
+ /* From section 4.3.9 of the GLSL 4.40 spec:
+ *
+ * "[In interface blocks] opaque types are not allowed."
+ *
+ * It should be impossible for decl_type to be NULL here. Cases that
+ * might naturally lead to decl_type being NULL, especially for the
+ * is_interface case, will have resulted in compilation having
+ * already halted due to a syntax error.
+ */
+ assert(decl_type);
+
+ if (is_interface && decl_type->contains_opaque()) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/buffer in non-default interface block contains "
+ "opaque variable");
+ }
+
+ if (decl_type->contains_atomic()) {
+ /* From section 4.1.7.3 of the GLSL 4.40 spec:
*
- * It should be impossible for decl_type to be NULL here. Cases that
- * might naturally lead to decl_type being NULL, especially for the
- * is_interface case, will have resulted in compilation having
- * already halted due to a syntax error.
+ * "Members of structures cannot be declared as atomic counter
+ * types."
*/
- assert(decl_type);
+ _mesa_glsl_error(&loc, state, "atomic counter in structure, "
+ "shader storage block or uniform block");
+ }
- if (is_interface && decl_type->contains_opaque()) {
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "uniform/buffer in non-default interface block contains "
- "opaque variable");
- }
+ if (decl_type->contains_image()) {
+ /* FINISHME: Same problem as with atomic counters.
+ * FINISHME: Request clarification from Khronos and add
+ * FINISHME: spec quotation here.
+ */
+ _mesa_glsl_error(&loc, state,
+ "image in structure, shader storage block or "
+ "uniform block");
+ }
- if (decl_type->contains_atomic()) {
- /* From section 4.1.7.3 of the GLSL 4.40 spec:
- *
- * "Members of structures cannot be declared as atomic counter
- * types."
- */
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state, "atomic counter in structure, "
- "shader storage block or uniform block");
- }
+ if (qual->flags.q.explicit_binding) {
+ _mesa_glsl_error(&loc, state,
+ "binding layout qualifier cannot be applied "
+ "to struct or interface block members");
+ }
- if (decl_type->contains_image()) {
- /* FINISHME: Same problem as with atomic counters.
- * FINISHME: Request clarification from Khronos and add
- * FINISHME: spec quotation here.
- */
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "image in structure, shader storage block or "
- "uniform block");
+ if (qual->flags.q.std140 ||
+ qual->flags.q.std430 ||
+ qual->flags.q.packed ||
+ qual->flags.q.shared) {
+ _mesa_glsl_error(&loc, state,
+ "uniform/shader storage block layout qualifiers "
+ "std140, std430, packed, and shared can only be "
+ "applied to uniform/shader storage blocks, not "
+ "members");
+ }
+
+ if (qual->flags.q.constant) {
+ _mesa_glsl_error(&loc, state,
+ "const storage qualifier cannot be applied "
+ "to struct or interface block members");
+ }
+
+ /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
+ *
+ * "A block member may be declared with a stream identifier, but
+ * the specified stream must match the stream associated with the
+ * containing block."
+ */
+ if (qual->flags.q.explicit_stream) {
+ unsigned qual_stream;
+ if (process_qualifier_constant(state, &loc, "stream",
+ qual->stream, &qual_stream) &&
+ qual_stream != block_stream) {
+ _mesa_glsl_error(&loc, state, "stream layout qualifier on "
+ "interface block member does not match "
+ "the interface block (%d vs %d)", qual->stream,
+ block_stream);
}
+ }
- const struct ast_type_qualifier *const qual =
- & decl_list->type->qualifier;
+ if (qual->flags.q.uniform && qual->has_interpolation()) {
+ _mesa_glsl_error(&loc, state,
+ "interpolation qualifiers cannot be used "
+ "with uniform interface blocks");
+ }
- if (qual->flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, decl_type, qual);
+ if ((qual->flags.q.uniform || !is_interface) &&
+ qual->has_auxiliary_storage()) {
+ _mesa_glsl_error(&loc, state,
+ "auxiliary storage qualifiers cannot be used "
+ "in uniform blocks or structures.");
+ }
- if (qual->flags.q.std140 ||
- qual->flags.q.std430 ||
- qual->flags.q.packed ||
- qual->flags.q.shared) {
+ if (qual->flags.q.row_major || qual->flags.q.column_major) {
+ if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
_mesa_glsl_error(&loc, state,
- "uniform/shader storage block layout qualifiers "
- "std140, std430, packed, and shared can only be "
- "applied to uniform/shader storage blocks, not "
- "members");
- }
+ "row_major and column_major can only be "
+ "applied to interface blocks");
+ } else
+ validate_matrix_layout_for_type(state, &loc, decl_type, NULL);
+ }
- if (qual->flags.q.constant) {
- YYLTYPE loc = decl_list->get_location();
- _mesa_glsl_error(&loc, state,
- "const storage qualifier cannot be applied "
- "to struct or interface block members");
- }
+ if (qual->flags.q.read_only && qual->flags.q.write_only) {
+ _mesa_glsl_error(&loc, state, "buffer variable can't be both "
+ "readonly and writeonly.");
+ }
+
+ foreach_list_typed (ast_declaration, decl, link,
+ &decl_list->declarations) {
+ YYLTYPE loc = decl->get_location();
+
+ if (!allow_reserved_names)
+ validate_identifier(decl->identifier, loc, state);
const struct glsl_type *field_type =
process_array_type(&loc, decl_type, decl->array_specifier, state);
@@ -6183,42 +6345,6 @@ ast_process_structure_or_interface_block(exec_list *instructions,
fields[i].patch = qual->flags.q.patch ? 1 : 0;
fields[i].precision = qual->precision;
- /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
- *
- * "A block member may be declared with a stream identifier, but
- * the specified stream must match the stream associated with the
- * containing block."
- */
- if (qual->flags.q.explicit_stream &&
- qual->stream != layout->stream) {
- _mesa_glsl_error(&loc, state, "stream layout qualifier on "
- "interface block member `%s' does not match "
- "the interface block (%d vs %d)",
- fields[i].name, qual->stream, layout->stream);
- }
-
- if (qual->flags.q.row_major || qual->flags.q.column_major) {
- if (!qual->flags.q.uniform && !qual->flags.q.buffer) {
- _mesa_glsl_error(&loc, state,
- "row_major and column_major can only be "
- "applied to interface blocks");
- } else
- validate_matrix_layout_for_type(state, &loc, field_type, NULL);
- }
-
- if (qual->flags.q.uniform && qual->has_interpolation()) {
- _mesa_glsl_error(&loc, state,
- "interpolation qualifiers cannot be used "
- "with uniform interface blocks");
- }
-
- if ((qual->flags.q.uniform || !is_interface) &&
- qual->has_auxiliary_storage()) {
- _mesa_glsl_error(&loc, state,
- "auxiliary storage qualifiers cannot be used "
- "in uniform blocks or structures.");
- }
-
/* Propogate row- / column-major information down the fields of the
* structure or interface block. Structures need this data because
* the structure may contain a structure that contains ... a matrix
@@ -6248,29 +6374,20 @@ ast_process_structure_or_interface_block(exec_list *instructions,
* be defined inside shader storage buffer objects
*/
if (layout && var_mode == ir_var_shader_storage) {
- if (qual->flags.q.read_only && qual->flags.q.write_only) {
- _mesa_glsl_error(&loc, state,
- "buffer variable `%s' can't be "
- "readonly and writeonly.", fields[i].name);
- }
-
/* For readonly and writeonly qualifiers the field definition,
* if set, overwrites the layout qualifier.
*/
- bool read_only = layout->flags.q.read_only;
- bool write_only = layout->flags.q.write_only;
-
if (qual->flags.q.read_only) {
- read_only = true;
- write_only = false;
+ fields[i].image_read_only = true;
+ fields[i].image_write_only = false;
} else if (qual->flags.q.write_only) {
- read_only = false;
- write_only = true;
+ fields[i].image_read_only = false;
+ fields[i].image_write_only = true;
+ } else {
+ fields[i].image_read_only = layout->flags.q.read_only;
+ fields[i].image_write_only = layout->flags.q.write_only;
}
- fields[i].image_read_only = read_only;
- fields[i].image_write_only = write_only;
-
/* For other qualifiers, we set the flag if either the layout
* qualifier or the field qualifier are set
*/
@@ -6328,16 +6445,16 @@ ast_struct_specifier::hir(exec_list *instructions,
glsl_struct_field *fields;
unsigned decl_count =
- ast_process_structure_or_interface_block(instructions,
- state,
- &this->declarations,
- loc,
- &fields,
- false,
- GLSL_MATRIX_LAYOUT_INHERITED,
- false /* allow_reserved_names */,
- ir_var_auto,
- NULL);
+ ast_process_struct_or_iface_block_members(instructions,
+ state,
+ &this->declarations,
+ &fields,
+ false,
+ GLSL_MATRIX_LAYOUT_INHERITED,
+ false /* allow_reserved_names */,
+ ir_var_auto,
+ NULL,
+ 0 /* for interface only */);
validate_identifier(this->name, loc, state);
@@ -6483,17 +6600,36 @@ ast_interface_block::hir(exec_list *instructions,
*/
state->struct_specifier_depth++;
+ /* For blocks that accept memory qualifiers (i.e. shader storage), verify
+ * that we don't have incompatible qualifiers
+ */
+ if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) {
+ _mesa_glsl_error(&loc, state,
+ "Interface block sets both readonly and writeonly");
+ }
+
+ unsigned qual_stream;
+ if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream,
+ &qual_stream) ||
+ !validate_stream_qualifier(&loc, state, qual_stream)) {
+ /* If the stream qualifier is invalid it doesn't make sense to continue
+ * on and try to compare stream layouts on member variables against it
+ * so just return early.
+ */
+ return NULL;
+ }
+
unsigned int num_variables =
- ast_process_structure_or_interface_block(&declared_variables,
- state,
- &this->declarations,
- loc,
- &fields,
- true,
- matrix_layout,
- redeclaring_per_vertex,
- var_mode,
- &this->layout);
+ ast_process_struct_or_iface_block_members(&declared_variables,
+ state,
+ &this->declarations,
+ &fields,
+ true,
+ matrix_layout,
+ redeclaring_per_vertex,
+ var_mode,
+ &this->layout,
+ qual_stream);
state->struct_specifier_depth--;
@@ -6604,6 +6740,8 @@ ast_interface_block::hir(exec_list *instructions,
earlier_per_vertex->fields.structure[j].sample;
fields[i].patch =
earlier_per_vertex->fields.structure[j].patch;
+ fields[i].precision =
+ earlier_per_vertex->fields.structure[j].precision;
}
}
@@ -6633,8 +6771,6 @@ ast_interface_block::hir(exec_list *instructions,
num_variables,
packing,
this->block_name);
- if (this->layout.flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, block_type, &this->layout);
if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
YYLTYPE loc = this->get_location();
@@ -6765,10 +6901,6 @@ ast_interface_block::hir(exec_list *instructions,
"not allowed");
}
- if (this->layout.flags.q.explicit_binding)
- validate_binding_qualifier(state, &loc, block_array_type,
- &this->layout);
-
var = new(state) ir_variable(block_array_type,
this->instance_name,
var_mode);
@@ -6830,14 +6962,12 @@ ast_interface_block::hir(exec_list *instructions,
earlier->reinit_interface_type(block_type);
delete var;
} else {
- /* Propagate the "binding" keyword into this UBO's fields;
- * the UBO declaration itself doesn't get an ir_variable unless it
- * has an instance name. This is ugly.
- */
- var->data.explicit_binding = this->layout.flags.q.explicit_binding;
- var->data.binding = this->layout.binding;
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var, var->type,
+ &this->layout);
+ }
- var->data.stream = this->layout.stream;
+ var->data.stream = qual_stream;
state->symbols->add_variable(var);
instructions->push_tail(var);
@@ -6857,7 +6987,7 @@ ast_interface_block::hir(exec_list *instructions,
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
- var->data.stream = this->layout.stream;
+ var->data.stream = qual_stream;
var->init_interface_type(block_type);
if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform)
@@ -6914,8 +7044,10 @@ ast_interface_block::hir(exec_list *instructions,
* The UBO declaration itself doesn't get an ir_variable unless it
* has an instance name. This is ugly.
*/
- var->data.explicit_binding = this->layout.flags.q.explicit_binding;
- var->data.binding = this->layout.binding;
+ if (this->layout.flags.q.explicit_binding) {
+ apply_explicit_binding(state, &loc, var,
+ var->get_interface_type(), &this->layout);
+ }
if (var->type->is_unsized_array()) {
if (var->is_in_shader_storage_block()) {
@@ -6997,22 +7129,18 @@ ast_tcs_output_layout::hir(exec_list *instructions,
{
YYLTYPE loc = this->get_location();
- /* If any tessellation control output layout declaration preceded this
- * one, make sure it was consistent with this one.
- */
- if (state->tcs_output_vertices_specified &&
- state->out_qualifier->vertices != this->vertices) {
- _mesa_glsl_error(&loc, state,
- "tessellation control shader output layout does not "
- "match previous declaration");
- return NULL;
+ unsigned num_vertices;
+ if (!state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &num_vertices,
+ false)) {
+ /* return here to stop cascading incorrect error messages */
+ return NULL;
}
/* If any shader outputs occurred before this declaration and specified an
* array size, make sure the size they specified is consistent with the
* primitive type.
*/
- unsigned num_vertices = this->vertices;
if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) {
_mesa_glsl_error(&loc, state,
"this tessellation control shader output layout "
@@ -7120,20 +7248,6 @@ ast_cs_input_layout::hir(exec_list *instructions,
{
YYLTYPE loc = this->get_location();
- /* If any compute input layout declaration preceded this one, make sure it
- * was consistent with this one.
- */
- if (state->cs_input_local_size_specified) {
- for (int i = 0; i < 3; i++) {
- if (state->cs_input_local_size[i] != this->local_size[i]) {
- _mesa_glsl_error(&loc, state,
- "compute shader input layout does not match"
- " previous declaration");
- return NULL;
- }
- }
- }
-
/* From the ARB_compute_shader specification:
*
* If the local size of the shader in any dimension is greater
@@ -7146,15 +7260,30 @@ ast_cs_input_layout::hir(exec_list *instructions,
* report it at compile time as well.
*/
GLuint64 total_invocations = 1;
+ unsigned qual_local_size[3];
for (int i = 0; i < 3; i++) {
- if (this->local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
+
+ char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c",
+ 'x' + i);
+ /* Infer a local_size of 1 for unspecified dimensions */
+ if (this->local_size[i] == NULL) {
+ qual_local_size[i] = 1;
+ } else if (!this->local_size[i]->
+ process_qualifier_constant(state, local_size_str,
+ &qual_local_size[i], false)) {
+ ralloc_free(local_size_str);
+ return NULL;
+ }
+ ralloc_free(local_size_str);
+
+ if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) {
_mesa_glsl_error(&loc, state,
"local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE"
" (%d)", 'x' + i,
state->ctx->Const.MaxComputeWorkGroupSize[i]);
break;
}
- total_invocations *= this->local_size[i];
+ total_invocations *= qual_local_size[i];
if (total_invocations >
state->ctx->Const.MaxComputeWorkGroupInvocations) {
_mesa_glsl_error(&loc, state,
@@ -7165,9 +7294,23 @@ ast_cs_input_layout::hir(exec_list *instructions,
}
}
+ /* If any compute input layout declaration preceded this one, make sure it
+ * was consistent with this one.
+ */
+ if (state->cs_input_local_size_specified) {
+ for (int i = 0; i < 3; i++) {
+ if (state->cs_input_local_size[i] != qual_local_size[i]) {
+ _mesa_glsl_error(&loc, state,
+ "compute shader input layout does not match"
+ " previous declaration");
+ return NULL;
+ }
+ }
+ }
+
state->cs_input_local_size_specified = true;
for (int i = 0; i < 3; i++)
- state->cs_input_local_size[i] = this->local_size[i];
+ state->cs_input_local_size[i] = qual_local_size[i];
/* We may now declare the built-in constant gl_WorkGroupSize (see
* builtin_variable_generator::generate_constants() for why we didn't
@@ -7182,7 +7325,7 @@ ast_cs_input_layout::hir(exec_list *instructions,
ir_constant_data data;
memset(&data, 0, sizeof(data));
for (int i = 0; i < 3; i++)
- data.u[i] = this->local_size[i];
+ data.u[i] = qual_local_size[i];
var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data);
var->constant_initializer =
new(var) ir_constant(glsl_type::uvec3_type, &data);
@@ -7198,6 +7341,8 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
{
bool gl_FragColor_assigned = false;
bool gl_FragData_assigned = false;
+ bool gl_FragSecondaryColor_assigned = false;
+ bool gl_FragSecondaryData_assigned = false;
bool user_defined_fs_output_assigned = false;
ir_variable *user_defined_fs_output = NULL;
@@ -7215,6 +7360,10 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
gl_FragColor_assigned = true;
else if (strcmp(var->name, "gl_FragData") == 0)
gl_FragData_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0)
+ gl_FragSecondaryColor_assigned = true;
+ else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0)
+ gl_FragSecondaryData_assigned = true;
else if (!is_gl_identifier(var->name)) {
if (state->stage == MESA_SHADER_FRAGMENT &&
var->data.mode == ir_var_shader_out) {
@@ -7246,11 +7395,29 @@ detect_conflicting_assignments(struct _mesa_glsl_parse_state *state,
_mesa_glsl_error(&loc, state, "fragment shader writes to both "
"`gl_FragColor' and `%s'",
user_defined_fs_output->name);
+ } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragSecondaryColorEXT' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragColor' and"
+ " `gl_FragSecondaryDataEXT'");
+ } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) {
+ _mesa_glsl_error(&loc, state, "fragment shader writes to both "
+ "`gl_FragData' and"
+ " `gl_FragSecondaryColorEXT'");
} else if (gl_FragData_assigned && user_defined_fs_output_assigned) {
_mesa_glsl_error(&loc, state, "fragment shader writes to both "
"`gl_FragData' and `%s'",
user_defined_fs_output->name);
}
+
+ if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) &&
+ !state->EXT_blend_func_extended_enable) {
+ _mesa_glsl_error(&loc, state,
+ "Dual source blending requires EXT_blend_func_extended");
+ }
}
diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp
index 79134c19893..03ed4dcfa2a 100644
--- a/src/glsl/ast_type.cpp
+++ b/src/glsl/ast_type.cpp
@@ -38,13 +38,16 @@ ast_type_specifier::print(void) const
}
bool
-ast_fully_specified_type::has_qualifiers() const
+ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const
{
/* 'subroutine' isnt a real qualifier. */
ast_type_qualifier subroutine_only;
subroutine_only.flags.i = 0;
subroutine_only.flags.q.subroutine = 1;
subroutine_only.flags.q.subroutine_def = 1;
+ if (state->has_explicit_uniform_location()) {
+ subroutine_only.flags.q.explicit_index = 1;
+ }
return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0;
}
@@ -169,41 +172,32 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
if (q.flags.q.max_vertices) {
- if (this->flags.q.max_vertices && this->max_vertices != q.max_vertices) {
+ if (this->max_vertices) {
+ this->max_vertices->merge_qualifier(q.max_vertices);
+ } else {
+ this->max_vertices = q.max_vertices;
+ }
+ }
+
+ if (q.flags.q.subroutine_def) {
+ if (this->flags.q.subroutine_def) {
_mesa_glsl_error(loc, state,
- "geometry shader set conflicting max_vertices "
- "(%d and %d)", this->max_vertices, q.max_vertices);
- return false;
+ "conflicting subroutine qualifiers used");
+ } else {
+ this->subroutine_list = q.subroutine_list;
}
- this->max_vertices = q.max_vertices;
}
if (q.flags.q.invocations) {
- if (this->flags.q.invocations && this->invocations != q.invocations) {
- _mesa_glsl_error(loc, state,
- "geometry shader set conflicting invocations "
- "(%d and %d)", this->invocations, q.invocations);
- return false;
+ if (this->invocations) {
+ this->invocations->merge_qualifier(q.invocations);
+ } else {
+ this->invocations = q.invocations;
}
- this->invocations = q.invocations;
}
if (state->stage == MESA_SHADER_GEOMETRY &&
state->has_explicit_attrib_stream()) {
- if (q.flags.q.stream && q.stream >= state->ctx->Const.MaxVertexStreams) {
- _mesa_glsl_error(loc, state,
- "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
- "(%d > %d)",
- q.stream, state->ctx->Const.MaxVertexStreams - 1);
- }
- if (this->flags.q.explicit_stream &&
- this->stream >= state->ctx->Const.MaxVertexStreams) {
- _mesa_glsl_error(loc, state,
- "`stream' value is larger than MAX_VERTEX_STREAMS - 1 "
- "(%d > %d)",
- this->stream, state->ctx->Const.MaxVertexStreams - 1);
- }
-
if (!this->flags.q.explicit_stream) {
if (q.flags.q.stream) {
this->flags.q.stream = 1;
@@ -222,14 +216,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
}
if (q.flags.q.vertices) {
- if (this->flags.q.vertices && this->vertices != q.vertices) {
- _mesa_glsl_error(loc, state,
- "tessellation control shader set conflicting "
- "vertices (%d and %d)",
- this->vertices, q.vertices);
- return false;
+ if (this->vertices) {
+ this->vertices->merge_qualifier(q.vertices);
+ } else {
+ this->vertices = q.vertices;
}
- this->vertices = q.vertices;
}
if (q.flags.q.vertex_spacing) {
@@ -266,15 +257,11 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
for (int i = 0; i < 3; i++) {
if (q.flags.q.local_size & (1 << i)) {
- if ((this->flags.q.local_size & (1 << i)) &&
- this->local_size[i] != q.local_size[i]) {
- _mesa_glsl_error(loc, state,
- "compute shader set conflicting values for "
- "local_size_%c (%d and %d)", 'x' + i,
- this->local_size[i], q.local_size[i]);
- return false;
+ if (this->local_size[i]) {
+ this->local_size[i]->merge_qualifier(q.local_size[i]);
+ } else {
+ this->local_size[i] = q.local_size[i];
}
- this->local_size[i] = q.local_size[i];
}
}
@@ -313,7 +300,7 @@ ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc,
const bool r = this->merge_qualifier(loc, state, q);
if (state->stage == MESA_SHADER_TESS_CTRL) {
- node = new(mem_ctx) ast_tcs_output_layout(*loc, q.vertices);
+ node = new(mem_ctx) ast_tcs_output_layout(*loc);
}
return r;
@@ -417,15 +404,13 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
state->in_qualifier->prim_type = q.prim_type;
}
- if (this->flags.q.invocations &&
- q.flags.q.invocations &&
- this->invocations != q.invocations) {
- _mesa_glsl_error(loc, state,
- "conflicting invocations counts specified");
- return false;
- } else if (q.flags.q.invocations) {
+ if (q.flags.q.invocations) {
this->flags.q.invocations = 1;
- this->invocations = q.invocations;
+ if (this->invocations) {
+ this->invocations->merge_qualifier(q.invocations);
+ } else {
+ this->invocations = q.invocations;
+ }
}
if (q.flags.q.early_fragment_tests) {
@@ -468,15 +453,67 @@ ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc,
if (create_gs_ast) {
node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type);
} else if (create_cs_ast) {
- /* Infer a local_size of 1 for every unspecified dimension */
- unsigned local_size[3];
- for (int i = 0; i < 3; i++) {
- if (q.flags.q.local_size & (1 << i))
- local_size[i] = q.local_size[i];
- else
- local_size[i] = 1;
+ node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size);
+ }
+
+ return true;
+}
+
+bool
+ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state,
+ const char *qual_indentifier,
+ unsigned *value,
+ bool can_be_zero)
+{
+ int min_value = 0;
+ bool first_pass = true;
+ *value = 0;
+
+ if (!can_be_zero)
+ min_value = 1;
+
+ for (exec_node *node = layout_const_expressions.head;
+ !node->is_tail_sentinel(); node = node->next) {
+
+ exec_list dummy_instructions;
+ ast_node *const_expression = exec_node_data(ast_node, node, link);
+
+ ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state);
+
+ ir_constant *const const_int = ir->constant_expression_value();
+ if (const_int == NULL || !const_int->type->is_integer()) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s must be an integral constant "
+ "expression", qual_indentifier);
+ return false;
+ }
+
+ if (const_int->value.i[0] < min_value) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid "
+ "(%d < %d)", qual_indentifier,
+ const_int->value.i[0], min_value);
+ return false;
}
- node = new(mem_ctx) ast_cs_input_layout(*loc, local_size);
+
+ if (!first_pass && *value != const_int->value.u[0]) {
+ YYLTYPE loc = const_expression->get_location();
+ _mesa_glsl_error(&loc, state, "%s layout qualifier does not "
+ "match previous declaration (%d vs %d)",
+ qual_indentifier, *value, const_int->value.i[0]);
+ return false;
+ } else {
+ first_pass = false;
+ *value = const_int->value.u[0];
+ }
+
+ /* If the location is const (and we've verified that
+ * it is) then no instructions should have been emitted
+ * when we converted it to HIR. If they were emitted,
+ * then either the location isn't const after all, or
+ * we are emitting unnecessary instructions.
+ */
+ assert(dummy_instructions.is_empty());
}
return true;
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 13494446b59..881ee2b6b55 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -290,6 +290,20 @@ texture_multisample_array(const _mesa_glsl_parse_state *state)
}
static bool
+texture_samples_identical(const _mesa_glsl_parse_state *state)
+{
+ return texture_multisample(state) &&
+ state->EXT_shader_samples_identical_enable;
+}
+
+static bool
+texture_samples_identical_array(const _mesa_glsl_parse_state *state)
+{
+ return texture_multisample_array(state) &&
+ state->EXT_shader_samples_identical_enable;
+}
+
+static bool
fs_texture_cube_map_array(const _mesa_glsl_parse_state *state)
{
return state->stage == MESA_SHADER_FRAGMENT &&
@@ -724,6 +738,7 @@ private:
BA2(textureQueryLod);
B1(textureQueryLevels);
+ BA2(textureSamplesIdentical);
B1(dFdx);
B1(dFdy);
B1(fwidth);
@@ -2210,6 +2225,16 @@ builtin_builder::create_builtins()
NULL);
+ add_function("textureSamplesIdenticalEXT",
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type),
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type),
+ _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type),
+
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type),
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type),
+ _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type),
+ NULL);
+
add_function("texture1D",
_texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type),
_texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type),
@@ -3573,7 +3598,16 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type
ir_constant_data infinities;
for (int i = 0; i < type->vector_elements; i++) {
- infinities.f[i] = INFINITY;
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ infinities.f[i] = INFINITY;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ infinities.d[i] = INFINITY;
+ break;
+ default:
+ unreachable("unknown type");
+ }
}
body.emit(ret(equal(abs(x), imm(type, infinities))));
@@ -4675,6 +4709,25 @@ builtin_builder::_textureQueryLevels(const glsl_type *sampler_type)
return sig;
}
+ir_function_signature *
+builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail,
+ const glsl_type *sampler_type,
+ const glsl_type *coord_type)
+{
+ ir_variable *s = in_var(sampler_type, "sampler");
+ ir_variable *P = in_var(coord_type, "P");
+ const glsl_type *return_type = glsl_type::bool_type;
+ MAKE_SIG(return_type, avail, 2, s, P);
+
+ ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical);
+ tex->coordinate = var_ref(P);
+ tex->set_sampler(var_ref(s), return_type);
+
+ body.emit(ret(tex));
+
+ return sig;
+}
+
UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives)
UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control)
UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control)
@@ -5243,8 +5296,8 @@ builtin_builder::_image_size_prototype(const glsl_type *image_type,
ir_function_signature *
builtin_builder::_image_samples_prototype(const glsl_type *image_type,
- unsigned num_arguments,
- unsigned flags)
+ unsigned /* num_arguments */,
+ unsigned /* flags */)
{
ir_variable *image = in_var(image_type, "image");
ir_function_signature *sig =
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index b06c1bc5c12..e8eab808a19 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -327,6 +327,7 @@ per_vertex_accumulator::add_field(int slot, const glsl_type *type,
this->fields[this->num_fields].centroid = 0;
this->fields[this->num_fields].sample = 0;
this->fields[this->num_fields].patch = 0;
+ this->fields[this->num_fields].precision = GLSL_PRECISION_NONE;
this->num_fields++;
}
@@ -376,6 +377,11 @@ private:
return add_variable(name, type, ir_var_shader_out, slot);
}
+ ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name)
+ {
+ return add_index_variable(name, type, ir_var_shader_out, slot, index);
+ }
+
ir_variable *add_system_value(int slot, const glsl_type *type,
const char *name)
{
@@ -384,6 +390,8 @@ private:
ir_variable *add_variable(const char *name, const glsl_type *type,
enum ir_variable_mode mode, int slot);
+ ir_variable *add_index_variable(const char *name, const glsl_type *type,
+ enum ir_variable_mode mode, int slot, int index);
ir_variable *add_uniform(const glsl_type *type, const char *name);
ir_variable *add_const(const char *name, int value);
ir_variable *add_const_ivec3(const char *name, int x, int y, int z);
@@ -429,6 +437,46 @@ builtin_variable_generator::builtin_variable_generator(
{
}
+ir_variable *
+builtin_variable_generator::add_index_variable(const char *name,
+ const glsl_type *type,
+ enum ir_variable_mode mode, int slot, int index)
+{
+ ir_variable *var = new(symtab) ir_variable(type, name, mode);
+ var->data.how_declared = ir_var_declared_implicitly;
+
+ switch (var->data.mode) {
+ case ir_var_auto:
+ case ir_var_shader_in:
+ case ir_var_uniform:
+ case ir_var_system_value:
+ var->data.read_only = true;
+ break;
+ case ir_var_shader_out:
+ case ir_var_shader_storage:
+ break;
+ default:
+ /* The only variables that are added using this function should be
+ * uniforms, shader storage, shader inputs, and shader outputs, constants
+ * (which use ir_var_auto), and system values.
+ */
+ assert(0);
+ break;
+ }
+
+ var->data.location = slot;
+ var->data.explicit_location = (slot >= 0);
+ var->data.explicit_index = 1;
+ var->data.index = index;
+
+ /* Once the variable is created an initialized, add it to the symbol table
+ * and add the declaration to the IR stream.
+ */
+ instructions->push_tail(var);
+
+ symtab->add_variable(var);
+ return var;
+}
ir_variable *
builtin_variable_generator::add_variable(const char *name,
@@ -580,6 +628,14 @@ builtin_variable_generator::generate_constants()
add_const("gl_MaxVaryingVectors",
state->ctx->Const.MaxVarying);
}
+
+ /* EXT_blend_func_extended brings a built in constant
+ * for determining number of dual source draw buffers
+ */
+ if (state->EXT_blend_func_extended_enable) {
+ add_const("gl_MaxDualSourceDrawBuffersEXT",
+ state->Const.MaxDualSourceDrawBuffers);
+ }
} else {
add_const("gl_MaxVertexUniformComponents",
state->Const.MaxVertexUniformComponents);
@@ -1016,6 +1072,19 @@ builtin_variable_generator::generate_fs_special_vars()
array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
}
+ if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) {
+ /* We make an assumption here that there will only ever be one dual-source draw buffer
+ * In case this assumption is ever proven to be false, make sure to assert here
+ * since we don't handle this case.
+ * In practice, this issue will never arise since no hardware will support it.
+ */
+ assert(state->Const.MaxDualSourceDrawBuffers <= 1);
+ add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT");
+ add_index_output(FRAG_RESULT_DATA0, 1,
+ array(vec4_t, state->Const.MaxDualSourceDrawBuffers),
+ "gl_SecondaryFragDataEXT");
+ }
+
/* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL
* ES 1.00.
*/
@@ -1186,6 +1255,7 @@ builtin_variable_generator::generate_varyings()
var->data.centroid = fields[i].centroid;
var->data.sample = fields[i].sample;
var->data.patch = fields[i].patch;
+ var->data.precision = fields[i].precision;
var->init_interface_type(per_vertex_out_type);
}
}
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 6aa7abec00e..2fd4cf04079 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2384,6 +2384,8 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
add_builtin_define(parser, "GL_OES_standard_derivatives", 1);
if (extensions->ARB_texture_multisample)
add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1);
+ if (extensions->ARB_blend_func_extended)
+ add_builtin_define(parser, "GL_EXT_blend_func_extended", 1);
}
} else {
add_builtin_define(parser, "GL_ARB_draw_buffers", 1);
@@ -2510,6 +2512,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
if (extensions != NULL) {
if (extensions->EXT_shader_integer_mix)
add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1);
+
+ if (extensions->EXT_shader_samples_identical)
+ add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1);
}
if (version >= 150)
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index adf6a05acce..5a8f98019d1 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -298,7 +298,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2,
%type <node> conditionopt
%type <node> for_init_statement
%type <for_rest_statement> for_rest_statement
-%type <n> integer_constant
%type <node> layout_defaults
%right THEN ELSE
@@ -1152,11 +1151,6 @@ layout_qualifier_id_list:
}
;
-integer_constant:
- INTCONSTANT { $$ = $1; }
- | UINTCONSTANT { $$ = $1; }
- ;
-
layout_qualifier_id:
any_identifier
{
@@ -1453,9 +1447,18 @@ layout_qualifier_id:
YYERROR;
}
}
- | any_identifier '=' integer_constant
+ | any_identifier '=' constant_expression
{
memset(& $$, 0, sizeof($$));
+ void *ctx = state;
+
+ if ($3->oper != ast_int_constant &&
+ $3->oper != ast_uint_constant &&
+ !state->has_enhanced_layouts()) {
+ _mesa_glsl_error(& @1, state,
+ "compile-time constant expressions require "
+ "GLSL 4.40 or ARB_enhanced_layouts");
+ }
if (match_layout_qualifier("location", $1, state) == 0) {
$$.flags.q.explicit_location = 1;
@@ -1466,24 +1469,17 @@ layout_qualifier_id:
"GL_ARB_explicit_attrib_location layout "
"identifier `%s' used", $1);
}
-
- if ($3 >= 0) {
- $$.location = $3;
- } else {
- _mesa_glsl_error(& @3, state, "invalid location %d specified", $3);
- YYERROR;
- }
+ $$.location = $3;
}
if (match_layout_qualifier("index", $1, state) == 0) {
- $$.flags.q.explicit_index = 1;
-
- if ($3 >= 0) {
- $$.index = $3;
- } else {
- _mesa_glsl_error(& @3, state, "invalid index %d specified", $3);
+ if (state->es_shader && !state->EXT_blend_func_extended_enable) {
+ _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended");
YYERROR;
}
+
+ $$.flags.q.explicit_index = 1;
+ $$.index = $3;
}
if ((state->has_420pack() ||
@@ -1502,18 +1498,11 @@ layout_qualifier_id:
if (match_layout_qualifier("max_vertices", $1, state) == 0) {
$$.flags.q.max_vertices = 1;
-
- if ($3 < 0) {
+ $$.max_vertices = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->is_version(150, 0)) {
_mesa_glsl_error(& @3, state,
- "invalid max_vertices %d specified", $3);
- YYERROR;
- } else {
- $$.max_vertices = $3;
- if (!state->is_version(150, 0)) {
- _mesa_glsl_error(& @3, state,
- "#version 150 max_vertices qualifier "
- "specified", $3);
- }
+ "#version 150 max_vertices qualifier "
+ "specified", $3);
}
}
@@ -1521,15 +1510,8 @@ layout_qualifier_id:
if (match_layout_qualifier("stream", $1, state) == 0 &&
state->check_explicit_attrib_stream_allowed(& @3)) {
$$.flags.q.stream = 1;
-
- if ($3 < 0) {
- _mesa_glsl_error(& @3, state,
- "invalid stream %d specified", $3);
- YYERROR;
- } else {
- $$.flags.q.explicit_stream = 1;
- $$.stream = $3;
- }
+ $$.flags.q.explicit_stream = 1;
+ $$.stream = $3;
}
}
@@ -1541,12 +1523,7 @@ layout_qualifier_id:
for (int i = 0; i < 3; i++) {
if (match_layout_qualifier(local_size_qualifiers[i], $1,
state) == 0) {
- if ($3 <= 0) {
- _mesa_glsl_error(& @3, state,
- "invalid %s of %d specified",
- local_size_qualifiers[i], $3);
- YYERROR;
- } else if (!state->has_compute_shader()) {
+ if (!state->has_compute_shader()) {
_mesa_glsl_error(& @3, state,
"%s qualifier requires GLSL 4.30 or "
"GLSL ES 3.10 or ARB_compute_shader",
@@ -1554,7 +1531,7 @@ layout_qualifier_id:
YYERROR;
} else {
$$.flags.q.local_size |= (1 << i);
- $$.local_size[i] = $3;
+ $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3);
}
break;
}
@@ -1562,48 +1539,24 @@ layout_qualifier_id:
if (match_layout_qualifier("invocations", $1, state) == 0) {
$$.flags.q.invocations = 1;
-
- if ($3 <= 0) {
+ $$.invocations = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->is_version(400, 0) &&
+ !state->ARB_gpu_shader5_enable) {
_mesa_glsl_error(& @3, state,
- "invalid invocations %d specified", $3);
- YYERROR;
- } else if ($3 > MAX_GEOMETRY_SHADER_INVOCATIONS) {
- _mesa_glsl_error(& @3, state,
- "invocations (%d) exceeds "
- "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", $3);
- YYERROR;
- } else {
- $$.invocations = $3;
- if (!state->is_version(400, 0) &&
- !state->ARB_gpu_shader5_enable) {
- _mesa_glsl_error(& @3, state,
- "GL_ARB_gpu_shader5 invocations "
- "qualifier specified", $3);
- }
+ "GL_ARB_gpu_shader5 invocations "
+ "qualifier specified", $3);
}
}
/* Layout qualifiers for tessellation control shaders. */
if (match_layout_qualifier("vertices", $1, state) == 0) {
$$.flags.q.vertices = 1;
-
- if ($3 <= 0) {
- _mesa_glsl_error(& @3, state,
- "invalid vertices (%d) specified", $3);
- YYERROR;
- } else if ($3 > (int)state->Const.MaxPatchVertices) {
- _mesa_glsl_error(& @3, state,
- "vertices (%d) exceeds "
- "GL_MAX_PATCH_VERTICES", $3);
- YYERROR;
- } else {
- $$.vertices = $3;
- if (!state->ARB_tessellation_shader_enable &&
- !state->is_version(400, 0)) {
- _mesa_glsl_error(& @1, state,
- "vertices qualifier requires GLSL 4.00 or "
- "ARB_tessellation_shader");
- }
+ $$.vertices = new(ctx) ast_layout_expression(@1, $3);
+ if (!state->ARB_tessellation_shader_enable &&
+ !state->is_version(400, 0)) {
+ _mesa_glsl_error(& @1, state,
+ "vertices qualifier requires GLSL 4.00 or "
+ "ARB_tessellation_shader");
}
}
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 02584c62a4d..b41b64af2c1 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -104,6 +104,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx,
this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers;
+ this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers;
+
/* 1.50 constants */
this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents;
this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents;
@@ -646,9 +648,11 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(AMD_shader_trinary_minmax, true, false, dummy_true),
EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer),
EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index),
+ EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended),
EXT(EXT_draw_buffers, false, true, dummy_true),
EXT(EXT_separate_shader_objects, false, true, dummy_true),
EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix),
+ EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical),
EXT(EXT_texture_array, true, false, EXT_texture_array),
};
@@ -1646,8 +1650,20 @@ set_shader_inout_layout(struct gl_shader *shader,
switch (shader->Stage) {
case MESA_SHADER_TESS_CTRL:
shader->TessCtrl.VerticesOut = 0;
- if (state->tcs_output_vertices_specified)
- shader->TessCtrl.VerticesOut = state->out_qualifier->vertices;
+ if (state->tcs_output_vertices_specified) {
+ unsigned vertices;
+ if (state->out_qualifier->vertices->
+ process_qualifier_constant(state, "vertices", &vertices,
+ false)) {
+
+ YYLTYPE loc = state->out_qualifier->vertices->get_location();
+ if (vertices > state->Const.MaxPatchVertices) {
+ _mesa_glsl_error(&loc, state, "vertices (%d) exceeds "
+ "GL_MAX_PATCH_VERTICES", vertices);
+ }
+ shader->TessCtrl.VerticesOut = vertices;
+ }
+ }
break;
case MESA_SHADER_TESS_EVAL:
shader->TessEval.PrimitiveMode = PRIM_UNKNOWN;
@@ -1668,8 +1684,14 @@ set_shader_inout_layout(struct gl_shader *shader,
break;
case MESA_SHADER_GEOMETRY:
shader->Geom.VerticesOut = 0;
- if (state->out_qualifier->flags.q.max_vertices)
- shader->Geom.VerticesOut = state->out_qualifier->max_vertices;
+ if (state->out_qualifier->flags.q.max_vertices) {
+ unsigned qual_max_vertices;
+ if (state->out_qualifier->max_vertices->
+ process_qualifier_constant(state, "max_vertices",
+ &qual_max_vertices, true)) {
+ shader->Geom.VerticesOut = qual_max_vertices;
+ }
+ }
if (state->gs_input_prim_type_specified) {
shader->Geom.InputType = state->in_qualifier->prim_type;
@@ -1684,8 +1706,22 @@ set_shader_inout_layout(struct gl_shader *shader,
}
shader->Geom.Invocations = 0;
- if (state->in_qualifier->flags.q.invocations)
- shader->Geom.Invocations = state->in_qualifier->invocations;
+ if (state->in_qualifier->flags.q.invocations) {
+ unsigned invocations;
+ if (state->in_qualifier->invocations->
+ process_qualifier_constant(state, "invocations",
+ &invocations, false)) {
+
+ YYLTYPE loc = state->in_qualifier->invocations->get_location();
+ if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) {
+ _mesa_glsl_error(&loc, state,
+ "invocations (%d) exceeds "
+ "GL_MAX_GEOMETRY_SHADER_INVOCATIONS",
+ invocations);
+ }
+ shader->Geom.Invocations = invocations;
+ }
+ }
break;
case MESA_SHADER_COMPUTE:
@@ -1797,6 +1833,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
if (shader->InfoLog)
ralloc_free(shader->InfoLog);
+ if (!state->error)
+ set_shader_inout_layout(shader, state);
+
shader->symbols = new(shader->ir) glsl_symbol_table;
shader->CompileStatus = !state->error;
shader->InfoLog = state->info_log;
@@ -1804,9 +1843,6 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
shader->IsES = state->es_shader;
shader->uses_builtin_functions = state->uses_builtin_functions;
- if (!state->error)
- set_shader_inout_layout(shader, state);
-
/* Retain any live IR, but trash the rest. */
reparent_ir(shader->ir, shader->ir);
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 1d8c1b8799f..17ff0b5af79 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -380,6 +380,9 @@ struct _mesa_glsl_parse_state {
/* ARB_draw_buffers */
unsigned MaxDrawBuffers;
+ /* ARB_blend_func_extended */
+ unsigned MaxDualSourceDrawBuffers;
+
/* 3.00 ES */
int MinProgramTexelOffset;
int MaxProgramTexelOffset;
@@ -595,12 +598,16 @@ struct _mesa_glsl_parse_state {
bool AMD_vertex_shader_layer_warn;
bool AMD_vertex_shader_viewport_index_enable;
bool AMD_vertex_shader_viewport_index_warn;
+ bool EXT_blend_func_extended_enable;
+ bool EXT_blend_func_extended_warn;
bool EXT_draw_buffers_enable;
bool EXT_draw_buffers_warn;
bool EXT_separate_shader_objects_enable;
bool EXT_separate_shader_objects_warn;
bool EXT_shader_integer_mix_enable;
bool EXT_shader_integer_mix_warn;
+ bool EXT_shader_samples_identical_enable;
+ bool EXT_shader_samples_identical_warn;
bool EXT_texture_array_enable;
bool EXT_texture_array_warn;
/*@}*/
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 8933b230177..ca520f547a1 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1421,12 +1421,11 @@ ir_dereference::is_lvalue() const
}
-static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples" };
+static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" };
const char *ir_texture::opcode_string()
{
- assert((unsigned int) op <=
- sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]));
+ assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs));
return tex_opcode_strs[op];
}
@@ -1456,6 +1455,10 @@ ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type)
} else if (this->op == ir_lod) {
assert(type->vector_elements == 2);
assert(type->base_type == GLSL_TYPE_FLOAT);
+ } else if (this->op == ir_samples_identical) {
+ assert(type == glsl_type::bool_type);
+ assert(sampler->type->base_type == GLSL_TYPE_SAMPLER);
+ assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS);
} else {
assert(sampler->type->sampler_type == (int) type->base_type);
if (sampler->type->sampler_shadow)
@@ -1676,6 +1679,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
this->data.interpolation = INTERP_QUALIFIER_NONE;
this->data.max_array_access = 0;
this->data.atomic.offset = 0;
+ this->data.precision = GLSL_PRECISION_NONE;
this->data.image_read_only = false;
this->data.image_write_only = false;
this->data.image_coherent = false;
@@ -1842,6 +1846,7 @@ ir_function_signature::replace_parameters(exec_list *new_params)
ir_function::ir_function(const char *name)
: ir_instruction(ir_type_function)
{
+ this->subroutine_index = -1;
this->name = ralloc_strdup(this, name);
}
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index d59dee1e369..e1109eec1d3 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1171,6 +1171,8 @@ public:
*/
int num_subroutine_types;
const struct glsl_type **subroutine_types;
+
+ int subroutine_index;
};
inline const char *ir_function_signature::function_name() const
@@ -1965,6 +1967,7 @@ enum ir_texture_opcode {
ir_tg4, /**< Texture gather */
ir_query_levels, /**< Texture levels query */
ir_texture_samples, /**< Texture samples query */
+ ir_samples_identical, /**< Query whether all samples are definitely identical. */
};
@@ -1991,6 +1994,7 @@ enum ir_texture_opcode {
* (lod <type> <sampler> <coordinate>)
* (tg4 <type> <sampler> <coordinate> <offset> <component>)
* (query_levels <type> <sampler>)
+ * (samples_identical <sampler> <coordinate>)
*/
class ir_texture : public ir_rvalue {
public:
diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp
index d6b06eeec87..2aef4fcb4ac 100644
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -223,6 +223,7 @@ ir_texture::clone(void *mem_ctx, struct hash_table *ht) const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht);
@@ -269,6 +270,7 @@ ir_function::clone(void *mem_ctx, struct hash_table *ht) const
ir_function *copy = new(mem_ctx) ir_function(this->name);
copy->is_subroutine = this->is_subroutine;
+ copy->subroutine_index = this->subroutine_index;
copy->num_subroutine_types = this->num_subroutine_types;
copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types);
for (int i = 0; i < copy->num_subroutine_types; i++)
diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp
index 5f0785e0ece..b86f4ea16bb 100644
--- a/src/glsl/ir_equals.cpp
+++ b/src/glsl/ir_equals.cpp
@@ -58,8 +58,13 @@ ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const
return false;
for (unsigned i = 0; i < type->components(); i++) {
- if (value.u[i] != other->value.u[i])
- return false;
+ if (type->base_type == GLSL_TYPE_DOUBLE) {
+ if (value.d[i] != other->value.d[i])
+ return false;
+ } else {
+ if (value.u[i] != other->value.u[i])
+ return false;
+ }
}
return true;
@@ -152,6 +157,7 @@ ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
if (!lod_info.bias->equals(other->lod_info.bias, ignore))
diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp
index 6495cc4581d..213992af28c 100644
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -195,6 +195,7 @@ ir_texture::accept(ir_hierarchical_visitor *v)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
s = this->lod_info.bias->accept(v);
diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp
index 42b03fdea52..fd7bc2eea98 100644
--- a/src/glsl/ir_print_visitor.cpp
+++ b/src/glsl/ir_print_visitor.cpp
@@ -268,6 +268,14 @@ void ir_print_visitor::visit(ir_texture *ir)
{
fprintf(f, "(%s ", ir->opcode_string());
+ if (ir->op == ir_samples_identical) {
+ ir->sampler->accept(this);
+ fprintf(f, " ");
+ ir->coordinate->accept(this);
+ fprintf(f, ")");
+ return;
+ }
+
print_type(f, ir->type);
fprintf(f, " ");
@@ -334,6 +342,8 @@ void ir_print_visitor::visit(ir_texture *ir)
case ir_tg4:
ir->lod_info.component->accept(this);
break;
+ case ir_samples_identical:
+ unreachable(!"ir_samples_identical was already handled");
};
fprintf(f, ")");
}
diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp
index a6966f546bc..6486838b8b8 100644
--- a/src/glsl/ir_rvalue_visitor.cpp
+++ b/src/glsl/ir_rvalue_visitor.cpp
@@ -59,6 +59,7 @@ ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
handle_rvalue(&ir->lod_info.bias);
diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 7e77a675db1..c0b4b3e820c 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -766,7 +766,7 @@ public:
gl_shader_stage consumer_stage);
~varying_matches();
void record(ir_variable *producer_var, ir_variable *consumer_var);
- unsigned assign_locations();
+ unsigned assign_locations(uint64_t reserved_slots);
void store_locations() const;
private:
@@ -986,7 +986,7 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
* passed to varying_matches::record().
*/
unsigned
-varying_matches::assign_locations()
+varying_matches::assign_locations(uint64_t reserved_slots)
{
/* Sort varying matches into an order that makes them easy to pack. */
qsort(this->matches, this->num_matches, sizeof(*this->matches),
@@ -1013,6 +1013,10 @@ varying_matches::assign_locations()
!= this->matches[i].packing_class) {
*location = ALIGN(*location, 4);
}
+ while ((*location < MAX_VARYING * 4u) &&
+ (reserved_slots & (1u << *location / 4u))) {
+ *location = ALIGN(*location + 1, 4);
+ }
this->matches[i].generic_location = *location;
@@ -1376,6 +1380,38 @@ canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
}
/**
+ * Generate a bitfield map of the explicit locations for shader varyings.
+ *
+ * In theory a 32 bits value will be enough but a 64 bits value is future proof.
+ */
+uint64_t
+reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode)
+{
+ assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
+ assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */
+
+ uint64_t slots = 0;
+ int var_slot;
+
+ if (!stage)
+ return slots;
+
+ foreach_in_list(ir_instruction, node, stage->ir) {
+ ir_variable *const var = node->as_variable();
+
+ if (var == NULL || var->data.mode != io_mode || !var->data.explicit_location)
+ continue;
+
+ var_slot = var->data.location - VARYING_SLOT_VAR0;
+ if (var_slot >= 0 && var_slot < MAX_VARYING)
+ slots |= 1u << var_slot;
+ }
+
+ return slots;
+}
+
+
+/**
* Assign locations for all variables that are produced in one pipeline stage
* (the "producer") and consumed in the next stage (the "consumer").
*
@@ -1550,7 +1586,11 @@ assign_varying_locations(struct gl_context *ctx,
matches.record(matched_candidate->toplevel_var, NULL);
}
- const unsigned slots_used = matches.assign_locations();
+ const uint64_t reserved_slots =
+ reserved_varying_slot(producer, ir_var_shader_out) |
+ reserved_varying_slot(consumer, ir_var_shader_in);
+
+ const unsigned slots_used = matches.assign_locations(reserved_slots);
matches.store_locations();
for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index db00f8febc6..331d9a28007 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -3864,10 +3864,43 @@ link_assign_subroutine_types(struct gl_shader_program *prog)
sh->SubroutineFunctions[sh->NumSubroutineFunctions].types =
ralloc_array(sh, const struct glsl_type *,
fn->num_subroutine_types);
+
+ /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the
+ * GLSL 4.5 spec:
+ *
+ * "Each subroutine with an index qualifier in the shader must be
+ * given a unique index, otherwise a compile or link error will be
+ * generated."
+ */
+ for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+ if (sh->SubroutineFunctions[j].index != -1 &&
+ sh->SubroutineFunctions[j].index == fn->subroutine_index) {
+ linker_error(prog, "each subroutine index qualifier in the "
+ "shader must be unique\n");
+ return;
+ }
+ }
+ sh->SubroutineFunctions[sh->NumSubroutineFunctions].index =
+ fn->subroutine_index;
+
for (int j = 0; j < fn->num_subroutine_types; j++)
sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j];
sh->NumSubroutineFunctions++;
}
+
+ /* Assign index for subroutines without an explicit index*/
+ int index = 0;
+ for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) {
+ while (sh->SubroutineFunctions[j].index == -1) {
+ for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) {
+ if (sh->SubroutineFunctions[k].index == index)
+ break;
+ else if (k == sh->NumSubroutineFunctions - 1)
+ sh->SubroutineFunctions[j].index = index;
+ }
+ index++;
+ }
+ }
}
}
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index d8df3544f10..a26300d1d26 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -31,6 +31,7 @@
#include "ir_visitor.h"
#include "ir_hierarchical_visitor.h"
#include "ir.h"
+#include "main/imports.h"
/*
* pass to lower GLSL IR to NIR
@@ -147,16 +148,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
nir_lower_outputs_to_temporaries(shader);
- /* TODO: Use _mesa_fls instead */
- unsigned num_textures = 0;
- for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
- if (sh->Program->SamplersUsed & (1 << i))
- num_textures = i;
-
shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
shader->info.label = ralloc_strdup(shader, shader_prog->Label);
- shader->info.num_textures = num_textures;
+ shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed);
shader->info.num_ubos = sh->NumUniformBlocks;
shader->info.num_abos = shader_prog->NumAtomicBuffers;
shader->info.num_ssbos = sh->NumShaderStorageBlocks;
@@ -174,6 +169,10 @@ glsl_to_nir(const struct gl_shader_program *shader_prog,
shader_prog->TransformFeedback.NumVarying > 0;
switch (stage) {
+ case MESA_SHADER_TESS_CTRL:
+ shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut;
+ break;
+
case MESA_SHADER_GEOMETRY:
shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn;
shader->info.gs.output_primitive = sh->Geom.OutputType;
@@ -244,6 +243,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
unsigned total_elems = ir->type->components();
unsigned i;
+
+ ret->num_elements = 0;
switch (ir->type->base_type) {
case GLSL_TYPE_UINT:
for (i = 0; i < total_elems; i++)
@@ -268,6 +269,8 @@ constant_copy(ir_constant *ir, void *mem_ctx)
case GLSL_TYPE_STRUCT:
ret->elements = ralloc_array(mem_ctx, nir_constant *,
ir->type->length);
+ ret->num_elements = ir->type->length;
+
i = 0;
foreach_in_list(ir_constant, field, &ir->components) {
ret->elements[i] = constant_copy(field, mem_ctx);
@@ -278,6 +281,7 @@ constant_copy(ir_constant *ir, void *mem_ctx)
case GLSL_TYPE_ARRAY:
ret->elements = ralloc_array(mem_ctx, nir_constant *,
ir->type->length);
+ ret->num_elements = ir->type->length;
for (i = 0; i < ir->type->length; i++)
ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx);
@@ -297,15 +301,6 @@ nir_visitor::visit(ir_variable *ir)
var->type = ir->type;
var->name = ralloc_strdup(var, ir->name);
- if (ir->is_interface_instance() && ir->get_max_ifc_array_access() != NULL) {
- unsigned size = ir->get_interface_type()->length;
- var->max_ifc_array_access = ralloc_array(var, unsigned, size);
- memcpy(var->max_ifc_array_access, ir->get_max_ifc_array_access(),
- size * sizeof(unsigned));
- } else {
- var->max_ifc_array_access = NULL;
- }
-
var->data.read_only = ir->data.read_only;
var->data.centroid = ir->data.centroid;
var->data.sample = ir->data.sample;
@@ -1543,9 +1538,9 @@ nir_visitor::visit(ir_expression *ir)
result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
: nir_for(&b, srcs[0], srcs[1]);
break;
- case ir_binop_logic_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;
- result = supports_ints ? nir_ior(&b, srcs[0], srcs[1])
- : nir_for(&b, srcs[0], srcs[1]);
+ case ir_binop_logic_xor:
+ result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1])
+ : nir_fxor(&b, srcs[0], srcs[1]);
break;
case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break;
case ir_binop_rshift:
@@ -1808,6 +1803,11 @@ nir_visitor::visit(ir_texture *ir)
num_srcs = 0;
break;
+ case ir_samples_identical:
+ op = nir_texop_samples_identical;
+ num_srcs = 1; /* coordinate */
+ break;
+
default:
unreachable("not reached");
}
@@ -1835,8 +1835,9 @@ nir_visitor::visit(ir_texture *ir)
case GLSL_TYPE_INT:
instr->dest_type = nir_type_int;
break;
+ case GLSL_TYPE_BOOL:
case GLSL_TYPE_UINT:
- instr->dest_type = nir_type_unsigned;
+ instr->dest_type = nir_type_uint;
break;
default:
unreachable("not reached");
diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp
index 3e9d38f7707..64b5c0cb106 100644
--- a/src/glsl/nir/glsl_types.cpp
+++ b/src/glsl/nir/glsl_types.cpp
@@ -130,6 +130,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
this->fields.structure[i].image_coherent = fields[i].image_coherent;
this->fields.structure[i].image_volatile = fields[i].image_volatile;
this->fields.structure[i].image_restrict = fields[i].image_restrict;
+ this->fields.structure[i].precision = fields[i].precision;
}
mtx_unlock(&glsl_type::mutex);
diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h
index 14c2aa49f85..1aafa5cd547 100644
--- a/src/glsl/nir/glsl_types.h
+++ b/src/glsl/nir/glsl_types.h
@@ -858,7 +858,7 @@ struct glsl_struct_field {
/**
* Precision qualifier
*/
- unsigned precision;
+ unsigned precision:2;
/**
* Image qualifiers, applicable to buffer variables defined in shader
@@ -873,7 +873,8 @@ struct glsl_struct_field {
#ifdef __cplusplus
glsl_struct_field(const struct glsl_type *_type, const char *_name)
: type(_type), name(_name), location(-1), interpolation(0), centroid(0),
- sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0)
+ sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+ precision(GLSL_PRECISION_NONE)
{
/* empty */
}
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 3157ff82d99..79df6d3df94 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -107,6 +107,10 @@ void
nir_shader_add_variable(nir_shader *shader, nir_variable *var)
{
switch (var->data.mode) {
+ case nir_var_all:
+ assert(!"invalid mode");
+ break;
+
case nir_var_local:
assert(!"nir_shader_add_variable cannot be used for local variables");
break;
@@ -312,6 +316,14 @@ nir_block_create(nir_shader *shader)
block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
block->imm_dom = NULL;
+ /* XXX maybe it would be worth it to defer allocation? This
+ * way it doesn't get allocated for shader ref's that never run
+ * nir_calc_dominance? For example, state-tracker creates an
+ * initial IR, clones that, runs appropriate lowering pass, passes
+ * to driver which does common lowering/opt, and then stores ref
+ * which is later used to do state specific lowering and futher
+ * opt. Do any of the references not need dominance metadata?
+ */
block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
_mesa_key_pointer_equal);
@@ -1306,21 +1318,62 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
{
assert(!new_src.is_ssa || def != new_src.ssa);
- nir_foreach_use_safe(def, use_src) {
- nir_instr *src_parent_instr = use_src->parent_instr;
- list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, src_parent_instr);
- src_add_all_uses(use_src, src_parent_instr, NULL);
- }
+ nir_foreach_use_safe(def, use_src)
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
+
+static bool
+is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
+{
+ assert(start->block == end->block);
+
+ if (between->block != start->block)
+ return false;
+
+ /* Search backwards looking for "between" */
+ while (start != end) {
+ if (between == end)
+ return true;
- nir_foreach_if_use_safe(def, use_src) {
- nir_if *src_parent_if = use_src->parent_if;
- list_del(&use_src->use_link);
- nir_src_copy(use_src, &new_src, src_parent_if);
- src_add_all_uses(use_src, NULL, src_parent_if);
+ end = nir_instr_prev(end);
+ assert(end);
}
+
+ return false;
}
+/* Replaces all uses of the given SSA def with the given source but only if
+ * the use comes after the after_me instruction. This can be useful if you
+ * are emitting code to fix up the result of some instruction: you can freely
+ * use the result in that code and then call rewrite_uses_after and pass the
+ * last fixup instruction as after_me and it will replace all of the uses you
+ * want without touching the fixup code.
+ *
+ * This function assumes that after_me is in the same block as
+ * def->parent_instr and that after_me comes after def->parent_instr.
+ */
+void
+nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me)
+{
+ assert(!new_src.is_ssa || def != new_src.ssa);
+
+ nir_foreach_use_safe(def, use_src) {
+ assert(use_src->parent_instr != def->parent_instr);
+ /* Since def already dominates all of its uses, the only way a use can
+ * not be dominated by after_me is if it is between def and after_me in
+ * the instruction list.
+ */
+ if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
+ nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
+ }
+
+ nir_foreach_if_use_safe(def, use_src)
+ nir_if_rewrite_condition(use_src->parent_if, new_src);
+}
static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb,
bool reverse, void *state);
@@ -1571,6 +1624,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_tess_level_inner;
case SYSTEM_VALUE_VERTICES_IN:
return nir_intrinsic_load_patch_vertices_in;
+ case SYSTEM_VALUE_HELPER_INVOCATION:
+ return nir_intrinsic_load_helper_invocation;
default:
unreachable("system value does not directly correspond to intrinsic");
}
@@ -1614,6 +1669,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_TESS_LEVEL_INNER;
case nir_intrinsic_load_patch_vertices_in:
return SYSTEM_VALUE_VERTICES_IN;
+ case nir_intrinsic_load_helper_invocation:
+ return SYSTEM_VALUE_HELPER_INVOCATION;
default:
unreachable("intrinsic doesn't produce a system value");
}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index df0e6f1f54a..b7374e17407 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -82,6 +82,7 @@ typedef struct {
} nir_state_slot;
typedef enum {
+ nir_var_all = -1,
nir_var_shader_in,
nir_var_shader_out,
nir_var_global,
@@ -111,6 +112,11 @@ typedef struct nir_constant {
*/
union nir_constant_data value;
+ /* we could get this from the var->type but makes clone *much* easier to
+ * not have to care about the type.
+ */
+ unsigned num_elements;
+
/* Array elements / Structure Fields */
struct nir_constant **elements;
} nir_constant;
@@ -147,19 +153,6 @@ typedef struct {
*/
char *name;
- /**
- * For variables which satisfy the is_interface_instance() predicate, this
- * points to an array of integers such that if the ith member of the
- * interface block is an array, max_ifc_array_access[i] is the maximum
- * array element of that member that has been accessed. If the ith member
- * of the interface block is not an array, max_ifc_array_access[i] is
- * unused.
- *
- * For variables whose type is not an interface block, this pointer is
- * NULL.
- */
- unsigned *max_ifc_array_access;
-
struct nir_variable_data {
/**
@@ -654,7 +647,7 @@ typedef enum {
nir_type_invalid = 0, /* Not a valid type */
nir_type_float,
nir_type_int,
- nir_type_unsigned,
+ nir_type_uint,
nir_type_bool
} nir_alu_type;
@@ -977,6 +970,9 @@ typedef enum {
nir_texop_tg4, /**< Texture gather */
nir_texop_query_levels, /**< Texture levels query */
nir_texop_texture_samples, /**< Texture samples query */
+ nir_texop_samples_identical, /**< Query whether all samples are definitely
+ * identical.
+ */
} nir_texop;
typedef struct {
@@ -1069,6 +1065,7 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
case nir_texop_texture_samples:
case nir_texop_query_levels:
+ case nir_texop_samples_identical:
return 1;
default:
@@ -1079,6 +1076,31 @@ nir_tex_instr_dest_size(nir_tex_instr *instr)
}
}
+/* Returns true if this texture operation queries something about the texture
+ * rather than actually sampling it.
+ */
+static inline bool
+nir_tex_instr_is_query(nir_tex_instr *instr)
+{
+ switch (instr->op) {
+ case nir_texop_txs:
+ case nir_texop_lod:
+ case nir_texop_texture_samples:
+ case nir_texop_query_levels:
+ return true;
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd:
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_tg4:
+ return false;
+ default:
+ unreachable("Invalid texture opcode");
+ }
+}
+
static inline unsigned
nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
{
@@ -1353,6 +1375,7 @@ typedef enum {
nir_metadata_block_index = 0x1,
nir_metadata_dominance = 0x2,
nir_metadata_live_ssa_defs = 0x4,
+ nir_metadata_not_properly_reset = 0x8,
} nir_metadata;
typedef struct {
@@ -1578,6 +1601,11 @@ typedef struct nir_shader_info {
struct {
unsigned local_size[3];
} cs;
+
+ struct {
+ /** The number of vertices in the TCS output patch. */
+ unsigned vertices_out;
+ } tcs;
};
} nir_shader_info;
@@ -1910,6 +1938,8 @@ void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, const char *name);
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
+void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
+ nir_instr *after_me);
/* visits basic blocks in source-code order */
typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state);
@@ -1937,10 +1967,16 @@ void nir_index_blocks(nir_function_impl *impl);
void nir_print_shader(nir_shader *shader, FILE *fp);
void nir_print_instr(const nir_instr *instr, FILE *fp);
+nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
+void nir_metadata_set_validation_flag(nir_shader *shader);
+void nir_metadata_check_validation_flag(nir_shader *shader);
#else
static inline void nir_validate_shader(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; }
+static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
#endif /* DEBUG */
void nir_calc_dominance_impl(nir_function_impl *impl);
@@ -2032,9 +2068,22 @@ typedef struct nir_lower_tex_options {
unsigned saturate_s;
unsigned saturate_t;
unsigned saturate_r;
+
+ /* Bitmask of samplers that need swizzling.
+ *
+ * If (swizzle_result & (1 << sampler_index)), then the swizzle in
+ * swizzles[sampler_index] is applied to the result of the texturing
+ * operation.
+ */
+ unsigned swizzle_result;
+
+ /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles
+ * while 4 and 5 represent 0 and 1 respectively.
+ */
+ uint8_t swizzles[32][4];
} nir_lower_tex_options;
-void nir_lower_tex(nir_shader *shader,
+bool nir_lower_tex(nir_shader *shader,
const nir_lower_tex_options *options);
void nir_lower_idiv(nir_shader *shader);
diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h
index 205aa067b0b..fe41c74b608 100644
--- a/src/glsl/nir/nir_builder.h
+++ b/src/glsl/nir/nir_builder.h
@@ -256,7 +256,7 @@ nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4],
{
nir_alu_src alu_src = { NIR_SRC_INIT };
alu_src.src = nir_src_for_ssa(src);
- for (int i = 0; i < 4; i++)
+ for (unsigned i = 0; i < num_components; i++)
alu_src.swizzle[i] = swiz[i];
return use_fmov ? nir_fmov_alu(build, alu_src, num_components) :
@@ -290,6 +290,8 @@ nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
/**
* Turns a nir_src into a nir_ssa_def * so it can be passed to
* nir_build_alu()-based builder calls.
+ *
+ * See nir_ssa_for_alu_src() for alu instructions.
*/
static inline nir_ssa_def *
nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
@@ -305,6 +307,25 @@ nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
return nir_imov_alu(build, alu, num_components);
}
+/**
+ * Similar to nir_ssa_for_src(), but for alu src's, respecting the
+ * nir_alu_src's swizzle.
+ */
+static inline nir_ssa_def *
+nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
+{
+ static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 };
+ nir_alu_src *src = &instr->src[srcn];
+ unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
+
+ if (src->src.is_ssa && (src->src.ssa->num_components == num_components) &&
+ !src->abs && !src->negate &&
+ (memcmp(src->swizzle, trivial_swizzle, num_components) == 0))
+ return src->src.ssa;
+
+ return nir_imov_alu(build, *src, num_components);
+}
+
static inline nir_ssa_def *
nir_load_var(nir_builder *build, nir_variable *var)
{
diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c
new file mode 100644
index 00000000000..68b72ef5381
--- /dev/null
+++ b/src/glsl/nir/nir_clone.c
@@ -0,0 +1,674 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_control_flow_private.h"
+
+/* Secret Decoder Ring:
+ * clone_foo():
+ * Allocate and clone a foo.
+ * __clone_foo():
+ * Clone body of foo (ie. parent class, embedded struct, etc)
+ */
+
+typedef struct {
+ /* maps orig ptr -> cloned ptr: */
+ struct hash_table *ptr_table;
+
+ /* List of phi sources. */
+ struct list_head phi_srcs;
+
+ /* new shader object, used as memctx for just about everything else: */
+ nir_shader *ns;
+} clone_state;
+
+static void
+init_clone_state(clone_state *state)
+{
+ state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ list_inithead(&state->phi_srcs);
+}
+
+static void
+free_clone_state(clone_state *state)
+{
+ _mesa_hash_table_destroy(state->ptr_table, NULL);
+}
+
+static void *
+lookup_ptr(clone_state *state, const void *ptr)
+{
+ struct hash_entry *entry;
+
+ if (!ptr)
+ return NULL;
+
+ entry = _mesa_hash_table_search(state->ptr_table, ptr);
+ assert(entry && "Failed to find pointer!");
+ if (!entry)
+ return NULL;
+
+ return entry->data;
+}
+
+static void
+store_ptr(clone_state *state, void *nptr, const void *ptr)
+{
+ _mesa_hash_table_insert(state->ptr_table, ptr, nptr);
+}
+
+static nir_constant *
+clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar)
+{
+ nir_constant *nc = ralloc(nvar, nir_constant);
+
+ nc->value = c->value;
+ nc->num_elements = c->num_elements;
+ nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements);
+ for (unsigned i = 0; i < c->num_elements; i++) {
+ nc->elements[i] = clone_constant(state, c->elements[i], nvar);
+ }
+
+ return nc;
+}
+
+/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
+ * having to deal with locals and globals separately:
+ */
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+ nir_variable *nvar = rzalloc(state->ns, nir_variable);
+ store_ptr(state, nvar, var);
+
+ nvar->type = var->type;
+ nvar->name = ralloc_strdup(nvar, var->name);
+ nvar->data = var->data;
+ nvar->num_state_slots = var->num_state_slots;
+ nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
+ memcpy(nvar->state_slots, var->state_slots,
+ var->num_state_slots * sizeof(nir_state_slot));
+ if (var->constant_initializer) {
+ nvar->constant_initializer =
+ clone_constant(state, var->constant_initializer, nvar);
+ }
+ nvar->interface_type = var->interface_type;
+
+ return nvar;
+}
+
+/* clone list of nir_variable: */
+static void
+clone_var_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_variable, var, node, list) {
+ nir_variable *nvar = clone_variable(state, var);
+ exec_list_push_tail(dst, &nvar->node);
+ }
+}
+
+/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create()
+ * to avoid having to deal with locals and globals separately:
+ */
+static nir_register *
+clone_register(clone_state *state, const nir_register *reg)
+{
+ nir_register *nreg = rzalloc(state->ns, nir_register);
+ store_ptr(state, nreg, reg);
+
+ nreg->num_components = reg->num_components;
+ nreg->num_array_elems = reg->num_array_elems;
+ nreg->index = reg->index;
+ nreg->name = ralloc_strdup(nreg, reg->name);
+ nreg->is_global = reg->is_global;
+ nreg->is_packed = reg->is_packed;
+
+ /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */
+ list_inithead(&nreg->uses);
+ list_inithead(&nreg->defs);
+ list_inithead(&nreg->if_uses);
+
+ return nreg;
+}
+
+/* clone list of nir_register: */
+static void
+clone_reg_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ exec_list_make_empty(dst);
+ foreach_list_typed(nir_register, reg, node, list) {
+ nir_register *nreg = clone_register(state, reg);
+ exec_list_push_tail(dst, &nreg->node);
+ }
+}
+
+static void
+__clone_src(clone_state *state, void *ninstr_or_if,
+ nir_src *nsrc, const nir_src *src)
+{
+ nsrc->is_ssa = src->is_ssa;
+ if (src->is_ssa) {
+ nsrc->ssa = lookup_ptr(state, src->ssa);
+ } else {
+ nsrc->reg.reg = lookup_ptr(state, src->reg.reg);
+ if (src->reg.indirect) {
+ nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src);
+ __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect);
+ }
+ nsrc->reg.base_offset = src->reg.base_offset;
+ }
+}
+
+static void
+__clone_dst(clone_state *state, nir_instr *ninstr,
+ nir_dest *ndst, const nir_dest *dst)
+{
+ ndst->is_ssa = dst->is_ssa;
+ if (dst->is_ssa) {
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+ store_ptr(state, &ndst->ssa, &dst->ssa);
+ } else {
+ ndst->reg.reg = lookup_ptr(state, dst->reg.reg);
+ if (dst->reg.indirect) {
+ ndst->reg.indirect = ralloc(ninstr, nir_src);
+ __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect);
+ }
+ ndst->reg.base_offset = dst->reg.base_offset;
+ }
+}
+
+static nir_deref *clone_deref(clone_state *state, const nir_deref *deref,
+ nir_instr *ninstr, nir_deref *parent);
+
+static nir_deref_var *
+clone_deref_var(clone_state *state, const nir_deref_var *dvar,
+ nir_instr *ninstr)
+{
+ nir_variable *nvar = lookup_ptr(state, dvar->var);
+ nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar);
+
+ if (dvar->deref.child)
+ ndvar->deref.child = clone_deref(state, dvar->deref.child,
+ ninstr, &ndvar->deref);
+
+ return ndvar;
+}
+
+static nir_deref_array *
+clone_deref_array(clone_state *state, const nir_deref_array *darr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_array *ndarr = nir_deref_array_create(parent);
+
+ ndarr->deref.type = darr->deref.type;
+ if (darr->deref.child)
+ ndarr->deref.child = clone_deref(state, darr->deref.child,
+ ninstr, &ndarr->deref);
+
+ ndarr->deref_array_type = darr->deref_array_type;
+ ndarr->base_offset = darr->base_offset;
+ if (ndarr->deref_array_type == nir_deref_array_type_indirect)
+ __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect);
+
+ return ndarr;
+}
+
+static nir_deref_struct *
+clone_deref_struct(clone_state *state, const nir_deref_struct *dstr,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index);
+
+ ndstr->deref.type = dstr->deref.type;
+ if (dstr->deref.child)
+ ndstr->deref.child = clone_deref(state, dstr->deref.child,
+ ninstr, &ndstr->deref);
+
+ return ndstr;
+}
+
+static nir_deref *
+clone_deref(clone_state *state, const nir_deref *dref,
+ nir_instr *ninstr, nir_deref *parent)
+{
+ switch (dref->deref_type) {
+ case nir_deref_type_array:
+ return &clone_deref_array(state, nir_deref_as_array(dref),
+ ninstr, parent)->deref;
+ case nir_deref_type_struct:
+ return &clone_deref_struct(state, nir_deref_as_struct(dref),
+ ninstr, parent)->deref;
+ default:
+ unreachable("bad deref type");
+ return NULL;
+ }
+}
+
+static nir_alu_instr *
+clone_alu(clone_state *state, const nir_alu_instr *alu)
+{
+ nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+
+ __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
+ nalu->dest.saturate = alu->dest.saturate;
+ nalu->dest.write_mask = alu->dest.write_mask;
+
+ for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+ __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src);
+ nalu->src[i].negate = alu->src[i].negate;
+ nalu->src[i].abs = alu->src[i].abs;
+ memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+ sizeof(nalu->src[i].swizzle));
+ }
+
+ return nalu;
+}
+
+static nir_intrinsic_instr *
+clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr)
+{
+ nir_intrinsic_instr *nitr =
+ nir_intrinsic_instr_create(state->ns, itr->intrinsic);
+
+ unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables;
+ unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs;
+
+ if (nir_intrinsic_infos[itr->intrinsic].has_dest)
+ __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest);
+
+ nitr->num_components = itr->num_components;
+ memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index));
+
+ for (unsigned i = 0; i < num_variables; i++) {
+ nitr->variables[i] = clone_deref_var(state, itr->variables[i],
+ &nitr->instr);
+ }
+
+ for (unsigned i = 0; i < num_srcs; i++)
+ __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]);
+
+ return nitr;
+}
+
+static nir_load_const_instr *
+clone_load_const(clone_state *state, const nir_load_const_instr *lc)
+{
+ nir_load_const_instr *nlc =
+ nir_load_const_instr_create(state->ns, lc->def.num_components);
+
+ memcpy(&nlc->value, &lc->value, sizeof(nlc->value));
+
+ store_ptr(state, &nlc->def, &lc->def);
+
+ return nlc;
+}
+
+static nir_ssa_undef_instr *
+clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa)
+{
+ nir_ssa_undef_instr *nsa =
+ nir_ssa_undef_instr_create(state->ns, sa->def.num_components);
+
+ store_ptr(state, &nsa->def, &sa->def);
+
+ return nsa;
+}
+
+static nir_tex_instr *
+clone_tex(clone_state *state, const nir_tex_instr *tex)
+{
+ nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs);
+
+ ntex->sampler_dim = tex->sampler_dim;
+ ntex->dest_type = tex->dest_type;
+ ntex->op = tex->op;
+ __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest);
+ for (unsigned i = 0; i < ntex->num_srcs; i++) {
+ ntex->src[i].src_type = tex->src[i].src_type;
+ __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src);
+ }
+ ntex->coord_components = tex->coord_components;
+ ntex->is_array = tex->is_array;
+ ntex->is_shadow = tex->is_shadow;
+ ntex->is_new_style_shadow = tex->is_new_style_shadow;
+ memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset));
+ ntex->component = tex->component;
+ ntex->texture_index = tex->texture_index;
+ ntex->texture_array_size = tex->texture_array_size;
+ if (tex->texture)
+ ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr);
+ ntex->sampler_index = tex->sampler_index;
+ if (tex->sampler)
+ ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr);
+
+ return ntex;
+}
+
+static nir_phi_instr *
+clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk)
+{
+ nir_phi_instr *nphi = nir_phi_instr_create(state->ns);
+
+ __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest);
+
+ /* Cloning a phi node is a bit different from other instructions. The
+ * sources of phi instructions are the only time where we can use an SSA
+ * def before it is defined. In order to handle this, we just copy over
+ * the sources from the old phi instruction directly and then fix them up
+ * in a second pass once all the instrutions in the function have been
+ * properly cloned.
+ *
+ * In order to ensure that the copied sources (which are the same as the
+ * old phi instruction's sources for now) don't get inserted into the old
+ * shader's use-def lists, we have to add the phi instruction *before* we
+ * set up its sources.
+ */
+ nir_instr_insert_after_block(nblk, &nphi->instr);
+
+ foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+ nir_phi_src *nsrc = ralloc(nphi, nir_phi_src);
+
+ /* Just copy the old source for now. */
+ memcpy(nsrc, src, sizeof(*src));
+
+ /* Since we're not letting nir_insert_instr handle use/def stuff for us,
+ * we have to set the parent_instr manually. It doesn't really matter
+ * when we do it, so we might as well do it here.
+ */
+ nsrc->src.parent_instr = &nphi->instr;
+
+ /* Stash it in the list of phi sources. We'll walk this list and fix up
+ * sources at the very end of clone_function_impl.
+ */
+ list_add(&nsrc->src.use_link, &state->phi_srcs);
+
+ exec_list_push_tail(&nphi->srcs, &nsrc->node);
+ }
+
+ return nphi;
+}
+
+static nir_jump_instr *
+clone_jump(clone_state *state, const nir_jump_instr *jmp)
+{
+ nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type);
+
+ return njmp;
+}
+
+static nir_call_instr *
+clone_call(clone_state *state, const nir_call_instr *call)
+{
+ nir_function_overload *ncallee = lookup_ptr(state, call->callee);
+ nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
+
+ for (unsigned i = 0; i < ncall->num_params; i++)
+ ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
+
+ ncall->return_deref = clone_deref_var(state, call->return_deref,
+ &ncall->instr);
+
+ return ncall;
+}
+
+static nir_instr *
+clone_instr(clone_state *state, const nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ return &clone_alu(state, nir_instr_as_alu(instr))->instr;
+ case nir_instr_type_intrinsic:
+ return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr;
+ case nir_instr_type_load_const:
+ return &clone_load_const(state, nir_instr_as_load_const(instr))->instr;
+ case nir_instr_type_ssa_undef:
+ return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr;
+ case nir_instr_type_tex:
+ return &clone_tex(state, nir_instr_as_tex(instr))->instr;
+ case nir_instr_type_phi:
+ unreachable("Cannot clone phis with clone_instr");
+ case nir_instr_type_jump:
+ return &clone_jump(state, nir_instr_as_jump(instr))->instr;
+ case nir_instr_type_call:
+ return &clone_call(state, nir_instr_as_call(instr))->instr;
+ case nir_instr_type_parallel_copy:
+ unreachable("Cannot clone parallel copies");
+ default:
+ unreachable("bad instr type");
+ return NULL;
+ }
+}
+
+static nir_block *
+clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk)
+{
+ /* Don't actually create a new block. Just use the one from the tail of
+ * the list. NIR guarantees that the tail of the list is a block and that
+ * no two blocks are side-by-side in the IR; It should be empty.
+ */
+ nir_block *nblk =
+ exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node);
+ assert(nblk->cf_node.type == nir_cf_node_block);
+ assert(exec_list_is_empty(&nblk->instr_list));
+
+ /* We need this for phi sources */
+ store_ptr(state, nblk, blk);
+
+ nir_foreach_instr(blk, instr) {
+ if (instr->type == nir_instr_type_phi) {
+ /* Phi instructions are a bit of a special case when cloning because
+ * we don't want inserting the instruction to automatically handle
+ * use/defs for us. Instead, we need to wait until all the
+ * blocks/instructions are in so that we can set their sources up.
+ */
+ clone_phi(state, nir_instr_as_phi(instr), nblk);
+ } else {
+ nir_instr *ninstr = clone_instr(state, instr);
+ nir_instr_insert_after_block(nblk, ninstr);
+ }
+ }
+
+ return nblk;
+}
+
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list);
+
+static nir_if *
+clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i)
+{
+ nir_if *ni = nir_if_create(state->ns);
+
+ __clone_src(state, ni, &ni->condition, &i->condition);
+
+ nir_cf_node_insert_end(cf_list, &ni->cf_node);
+
+ clone_cf_list(state, &ni->then_list, &i->then_list);
+ clone_cf_list(state, &ni->else_list, &i->else_list);
+
+ return ni;
+}
+
+static nir_loop *
+clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop)
+{
+ nir_loop *nloop = nir_loop_create(state->ns);
+
+ nir_cf_node_insert_end(cf_list, &nloop->cf_node);
+
+ clone_cf_list(state, &nloop->body, &loop->body);
+
+ return nloop;
+}
+
+/* clone list of nir_cf_node: */
+static void
+clone_cf_list(clone_state *state, struct exec_list *dst,
+ const struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, cf, node, list) {
+ switch (cf->type) {
+ case nir_cf_node_block:
+ clone_block(state, dst, nir_cf_node_as_block(cf));
+ break;
+ case nir_cf_node_if:
+ clone_if(state, dst, nir_cf_node_as_if(cf));
+ break;
+ case nir_cf_node_loop:
+ clone_loop(state, dst, nir_cf_node_as_loop(cf));
+ break;
+ default:
+ unreachable("bad cf type");
+ }
+ }
+}
+
+static nir_function_impl *
+clone_function_impl(clone_state *state, const nir_function_impl *fi,
+ nir_function_overload *nfo)
+{
+ nir_function_impl *nfi = nir_function_impl_create(nfo);
+
+ clone_var_list(state, &nfi->locals, &fi->locals);
+ clone_reg_list(state, &nfi->registers, &fi->registers);
+ nfi->reg_alloc = fi->reg_alloc;
+
+ nfi->num_params = fi->num_params;
+ nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
+ for (unsigned i = 0; i < fi->num_params; i++) {
+ nfi->params[i] = lookup_ptr(state, fi->params[i]);
+ }
+ nfi->return_var = lookup_ptr(state, fi->return_var);
+
+ assert(list_empty(&state->phi_srcs));
+
+ clone_cf_list(state, &nfi->body, &fi->body);
+
+ /* After we've cloned almost everything, we have to walk the list of phi
+ * sources and fix them up. Thanks to loops, the block and SSA value for a
+ * phi source may not be defined when we first encounter it. Instead, we
+ * add it to the phi_srcs list and we fix it up here.
+ */
+ list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) {
+ src->pred = lookup_ptr(state, src->pred);
+ assert(src->src.is_ssa);
+ src->src.ssa = lookup_ptr(state, src->src.ssa);
+
+ /* Remove from this list and place in the uses of the SSA def */
+ list_del(&src->src.use_link);
+ list_addtail(&src->src.use_link, &src->src.ssa->uses);
+ }
+ assert(list_empty(&state->phi_srcs));
+
+ /* All metadata is invalidated in the cloning process */
+ nfi->valid_metadata = 0;
+
+ return nfi;
+}
+
+static nir_function_overload *
+clone_function_overload(clone_state *state, const nir_function_overload *fo,
+ nir_function *nfxn)
+{
+ nir_function_overload *nfo = nir_function_overload_create(nfxn);
+
+ /* Needed for call instructions */
+ store_ptr(state, nfo, fo);
+
+ nfo->num_params = fo->num_params;
+ nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params);
+ memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params);
+
+ nfo->return_type = fo->return_type;
+
+ /* At first glance, it looks like we should clone the function_impl here.
+ * However, call instructions need to be able to reference at least the
+ * overload and those will get processed as we clone the function_impl's.
+ * We stop here and do function_impls as a second pass.
+ */
+
+ return nfo;
+}
+
+static nir_function *
+clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
+{
+ assert(ns == state->ns);
+ nir_function *nfxn = nir_function_create(ns, fxn->name);
+
+ foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list)
+ clone_function_overload(state, fo, nfxn);
+
+ return nfxn;
+}
+
+nir_shader *
+nir_shader_clone(void *mem_ctx, const nir_shader *s)
+{
+ clone_state state;
+ init_clone_state(&state);
+
+ nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options);
+ state.ns = ns;
+
+ clone_var_list(&state, &ns->uniforms, &s->uniforms);
+ clone_var_list(&state, &ns->inputs, &s->inputs);
+ clone_var_list(&state, &ns->outputs, &s->outputs);
+ clone_var_list(&state, &ns->globals, &s->globals);
+ clone_var_list(&state, &ns->system_values, &s->system_values);
+
+ /* Go through and clone functions and overloads */
+ foreach_list_typed(nir_function, fxn, node, &s->functions)
+ clone_function(&state, fxn, ns);
+
+ /* Only after all overloads are cloned can we clone the actual function
+ * implementations. This is because nir_call_instr's need to reference the
+ * overloads of other functions and we don't know what order the functions
+ * will have in the list.
+ */
+ nir_foreach_overload(s, fo) {
+ nir_function_overload *nfo = lookup_ptr(&state, fo);
+ clone_function_impl(&state, fo->impl, nfo);
+ }
+
+ clone_reg_list(&state, &ns->registers, &s->registers);
+ ns->reg_alloc = s->reg_alloc;
+
+ ns->info = s->info;
+ ns->info.name = ralloc_strdup(ns, ns->info.name);
+ if (ns->info.label)
+ ns->info.label = ralloc_strdup(ns, ns->info.label);
+
+ ns->num_inputs = s->num_inputs;
+ ns->num_uniforms = s->num_uniforms;
+ ns->num_outputs = s->num_outputs;
+
+ free_clone_state(&state);
+
+ return ns;
+}
diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py
index 2ba8554645d..b16ef503c92 100644
--- a/src/glsl/nir/nir_constant_expressions.py
+++ b/src/glsl/nir/nir_constant_expressions.py
@@ -213,7 +213,7 @@ unpack_half_1x16(uint16_t u)
}
/* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "unsigned", "bool"]:
+% for type in ["float", "int", "uint", "bool"]:
struct ${type}_vec {
${type} x;
${type} y;
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 0a134aff211..de30db61eea 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -244,6 +244,7 @@ SYSTEM_VALUE(local_invocation_id, 3, 0)
SYSTEM_VALUE(work_group_id, 3, 0)
SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
SYSTEM_VALUE(num_work_groups, 3, 0)
+SYSTEM_VALUE(helper_invocation, 1, 0)
/*
* The format of the indices depends on the type of the load. For uniforms,
diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c
index 31ccfb2c02b..c58c7785b3f 100644
--- a/src/glsl/nir/nir_lower_clip.c
+++ b/src/glsl/nir/nir_lower_clip.c
@@ -55,9 +55,11 @@ create_clipdist_var(nir_shader *shader, unsigned drvloc,
if (output) {
exec_list_push_tail(&shader->outputs, &var->node);
+ shader->num_outputs++; /* TODO use type_size() */
}
else {
exec_list_push_tail(&shader->inputs, &var->node);
+ shader->num_inputs++; /* TODO use type_size() */
}
return var;
}
diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c
index c961178c53a..f64b3eac8a0 100644
--- a/src/glsl/nir/nir_lower_idiv.c
+++ b/src/glsl/nir/nir_lower_idiv.c
@@ -52,10 +52,8 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
bld->cursor = nir_before_instr(&alu->instr);
- numer = nir_ssa_for_src(bld, alu->src[0].src,
- nir_ssa_alu_instr_src_components(alu, 0));
- denom = nir_ssa_for_src(bld, alu->src[1].src,
- nir_ssa_alu_instr_src_components(alu, 1));
+ numer = nir_ssa_for_alu_src(bld, alu, 0);
+ denom = nir_ssa_for_alu_src(bld, alu, 1);
if (is_signed) {
af = nir_i2f(bld, numer);
@@ -96,7 +94,7 @@ convert_instr(nir_builder *bld, nir_alu_instr *alu)
r = nir_imul(bld, q, b);
r = nir_isub(bld, a, r);
- r = nir_ige(bld, r, b);
+ r = nir_uge(bld, r, b);
r = nir_b2i(bld, r);
q = nir_iadd(bld, q, r);
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index 00a31458310..5683e69d865 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -197,7 +197,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
nir_variable_mode mode = intrin->variables[0]->var->data.mode;
- if (state->mode != -1 && state->mode != mode)
+ if (state->mode != nir_var_all && state->mode != mode)
continue;
if (mode != nir_var_shader_in &&
diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c
index 8aaa48ab568..93ebf8e78a9 100644
--- a/src/glsl/nir/nir_lower_tex.c
+++ b/src/glsl/nir/nir_lower_tex.c
@@ -41,6 +41,7 @@
typedef struct {
nir_builder b;
const nir_lower_tex_options *options;
+ bool progress;
} lower_tex_state;
static void
@@ -133,6 +134,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
txs->op = nir_texop_txs;
txs->sampler_dim = GLSL_SAMPLER_DIM_RECT;
txs->sampler_index = tex->sampler_index;
+ txs->dest_type = nir_type_int;
/* only single src, the lod: */
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
@@ -213,6 +215,66 @@ saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
}
}
+static nir_ssa_def *
+get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
+{
+ nir_const_value v;
+
+ memset(&v, 0, sizeof(v));
+
+ if (swizzle_val == 4) {
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+ } else {
+ assert(swizzle_val == 5);
+ if (type == nir_type_float)
+ v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+ else
+ v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+ }
+
+ return nir_build_imm(b, 4, v);
+}
+
+static void
+swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
+{
+ assert(tex->dest.is_ssa);
+
+ b->cursor = nir_after_instr(&tex->instr);
+
+ nir_ssa_def *swizzled;
+ if (tex->op == nir_texop_tg4) {
+ if (swizzle[tex->component] < 4) {
+ /* This one's easy */
+ tex->component = swizzle[tex->component];
+ return;
+ } else {
+ swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
+ }
+ } else {
+ assert(nir_tex_instr_dest_size(tex) == 4);
+ if (swizzle[0] < 4 && swizzle[1] < 4 &&
+ swizzle[2] < 4 && swizzle[3] < 4) {
+ unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
+ /* We have no 0's or 1's, just emit a swizzling MOV */
+ swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
+ } else {
+ nir_ssa_def *srcs[4];
+ for (unsigned i = 0; i < 4; i++) {
+ if (swizzle[i] < 4) {
+ srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
+ } else {
+ srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
+ }
+ }
+ swizzled = nir_vec(b, srcs, 4);
+ }
+ }
+
+ nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
+ swizzled->parent_instr);
+}
+
static bool
nir_lower_tex_block(nir_block *block, void *void_state)
{
@@ -239,15 +301,28 @@ nir_lower_tex_block(nir_block *block, void *void_state)
/* If we are clamping any coords, we must lower projector first
* as clamping happens *after* projection:
*/
- if (lower_txp || sat_mask)
+ if (lower_txp || sat_mask) {
project_src(b, tex);
+ state->progress = true;
+ }
if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) &&
- state->options->lower_rect)
+ state->options->lower_rect) {
lower_rect(b, tex);
+ state->progress = true;
+ }
- if (sat_mask)
+ if (sat_mask) {
saturate_src(b, tex, sat_mask);
+ state->progress = true;
+ }
+
+ if (((1 << tex->sampler_index) & state->options->swizzle_result) &&
+ !nir_tex_instr_is_query(tex) &&
+ !(tex->is_shadow && tex->is_new_style_shadow)) {
+ swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]);
+ state->progress = true;
+ }
}
return true;
@@ -264,13 +339,17 @@ nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state)
nir_metadata_dominance);
}
-void
+bool
nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
{
lower_tex_state state;
state.options = options;
+ state.progress = false;
+
nir_foreach_overload(shader, overload) {
if (overload->impl)
nir_lower_tex_impl(overload->impl, &state);
}
+
+ return state.progress;
}
diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c
index db519bf513b..6995b9d6bc1 100644
--- a/src/glsl/nir/nir_lower_two_sided_color.c
+++ b/src/glsl/nir/nir_lower_two_sided_color.c
@@ -60,6 +60,8 @@ create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot)
exec_list_push_tail(&shader->inputs, &var->node);
+ shader->num_inputs++; /* TODO use type_size() */
+
return var;
}
diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c
index 6de981f430f..d5324b35a78 100644
--- a/src/glsl/nir/nir_metadata.c
+++ b/src/glsl/nir/nir_metadata.c
@@ -52,3 +52,39 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
{
impl->valid_metadata &= preserved;
}
+
+#ifdef DEBUG
+/**
+ * Make sure passes properly invalidate metadata (part 1).
+ *
+ * Call this before running a pass to set a bogus metadata flag, which will
+ * only be preserved if the pass forgets to call nir_metadata_preserve().
+ */
+void
+nir_metadata_set_validation_flag(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ overload->impl->valid_metadata |= nir_metadata_not_properly_reset;
+ }
+ }
+}
+
+/**
+ * Make sure passes properly invalidate metadata (part 2).
+ *
+ * Call this after a pass makes progress to verify that the bogus metadata set by
+ * the earlier function was properly thrown away. Note that passes may not call
+ * nir_metadata_preserve() if they don't actually make any changes at all.
+ */
+void
+nir_metadata_check_validation_flag(nir_shader *shader)
+{
+ nir_foreach_overload(shader, overload) {
+ if (overload->impl) {
+ assert(!(overload->impl->valid_metadata &
+ nir_metadata_not_properly_reset));
+ }
+ }
+}
+#endif
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index 3c0f1da94af..37d3dfc4588 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -91,7 +91,7 @@ class Opcode(object):
tfloat = "float"
tint = "int"
tbool = "bool"
-tunsigned = "unsigned"
+tuint = "uint"
commutative = "commutative "
associative = "associative "
@@ -156,7 +156,7 @@ unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tfloat, tunsigned, "src0") # Float-to-unsigned conversion
+unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion
unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
unop_convert("f2b", tfloat, tbool, "src0 != 0.0f")
@@ -165,7 +165,7 @@ unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
unop_convert("i2b", tint, tbool, "src0 != 0")
unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tunsigned, tfloat, "src0") #Unsigned-to-float conversion.
+unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion.
unop_reduce("bany", 1, tbool, tbool, "{src}", "{src0} || {src1}", "{src}")
unop_reduce("ball", 1, tbool, tbool, "{src}", "{src0} && {src1}", "{src}")
@@ -205,13 +205,13 @@ unop("fddy_coarse", tfloat, "0.0f")
# Floating point pack and unpack operations.
def pack_2x16(fmt):
- unop_horiz("pack_" + fmt + "_2x16", 1, tunsigned, 2, tfloat, """
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
dst.x = (uint32_t) pack_fmt_1x16(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
""".replace("fmt", fmt))
def pack_4x8(fmt):
- unop_horiz("pack_" + fmt + "_4x8", 1, tunsigned, 4, tfloat, """
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
dst.x = (uint32_t) pack_fmt_1x8(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -219,13 +219,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
""".replace("fmt", fmt))
def unpack_2x16(fmt):
- unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tunsigned, """
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
""".replace("fmt", fmt))
def unpack_4x8(fmt):
- unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tunsigned, """
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -248,22 +248,22 @@ unpack_2x16("half")
# Lowered floating point unpacking operations.
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tunsigned,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
# Bit operations, part of ARB_gpu_shader5.
-unop("bitfield_reverse", tunsigned, """
+unop("bitfield_reverse", tuint, """
/* we're not winning any awards for speed here, but that's ok */
dst = 0;
for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
""")
-unop("bit_count", tunsigned, """
+unop("bit_count", tuint, """
dst = 0;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1)
@@ -271,7 +271,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
}
""")
-unop_convert("ufind_msb", tunsigned, tint, """
+unop_convert("ufind_msb", tuint, tint, """
dst = -1;
for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) {
@@ -358,25 +358,25 @@ binop("imul", tint, commutative + associative, "src0 * src1")
binop("imul_high", tint, commutative,
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
# high 32-bits of unsigned integer multiply
-binop("umul_high", tunsigned, commutative,
+binop("umul_high", tuint, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1")
binop("idiv", tint, "", "src0 / src1")
-binop("udiv", tunsigned, "", "src0 / src1")
+binop("udiv", tuint, "", "src0 / src1")
# returns a boolean representing the carry resulting from the addition of
# the two unsigned arguments.
-binop_convert("uadd_carry", tbool, tunsigned, commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tbool, tuint, commutative, "src0 + src1 < src0")
# returns a boolean representing the borrow resulting from the subtraction
# of the two unsigned arguments.
-binop_convert("usub_borrow", tbool, tunsigned, "", "src1 < src0")
+binop_convert("usub_borrow", tbool, tuint, "", "src1 < src0")
binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
-binop("umod", tunsigned, "", "src1 == 0 ? 0 : src0 % src1")
+binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1")
#
# Comparisons
@@ -393,8 +393,8 @@ binop_compare("ilt", tint, "", "src0 < src1")
binop_compare("ige", tint, "", "src0 >= src1")
binop_compare("ieq", tint, commutative, "src0 == src1")
binop_compare("ine", tint, commutative, "src0 != src1")
-binop_compare("ult", tunsigned, "", "src0 < src1")
-binop_compare("uge", tunsigned, "", "src0 >= src1")
+binop_compare("ult", tuint, "", "src0 < src1")
+binop_compare("uge", tuint, "", "src0 >= src1")
# integer-aware GLSL-style comparisons that compare floats and ints
@@ -425,7 +425,7 @@ binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not E
binop("ishl", tint, "", "src0 << src1")
binop("ishr", tint, "", "src0 >> src1")
-binop("ushr", tunsigned, "", "src0 >> src1")
+binop("ushr", tuint, "", "src0 >> src1")
# bitwise logic operators
#
@@ -433,9 +433,9 @@ binop("ushr", tunsigned, "", "src0 >> src1")
# integers.
-binop("iand", tunsigned, commutative + associative, "src0 & src1")
-binop("ior", tunsigned, commutative + associative, "src0 | src1")
-binop("ixor", tunsigned, commutative + associative, "src0 ^ src1")
+binop("iand", tuint, commutative + associative, "src0 & src1")
+binop("ior", tuint, commutative + associative, "src0 | src1")
+binop("ixor", tuint, commutative + associative, "src0 ^ src1")
# floating point logic operators
@@ -463,10 +463,10 @@ opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "",
binop("fmin", tfloat, "", "fminf(src0, src1)")
binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1")
-binop("umin", tunsigned, commutative + associative, "src1 > src0 ? src0 : src1")
+binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1")
binop("fmax", tfloat, "", "fmaxf(src0, src1)")
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
-binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
# Saturated vector add for 4 8bit ints.
binop("usadd_4x8", tint, commutative + associative, """
@@ -515,10 +515,10 @@ for (int i = 0; i < 32; i += 8) {
binop("fpow", tfloat, "", "powf(src0, src1)")
-binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
-binop_convert("bfm", tunsigned, tint, "", """
+binop_convert("bfm", tuint, tint, "", """
int offset = src0, bits = src1;
if (offset < 0 || bits < 0 || offset + bits > 32)
dst = 0; /* undefined per the spec */
@@ -535,7 +535,7 @@ if (!isnormal(dst))
# Combines the first component of each input to make a 2-component vector.
-binop_horiz("vec2", 2, tunsigned, 1, tunsigned, 1, tunsigned, """
+binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
dst.x = src0.x;
dst.y = src1.x;
""")
@@ -543,9 +543,9 @@ dst.y = src1.x;
def triop(name, ty, const_expr):
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr)
def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr):
- opcode(name, output_size, tunsigned,
+ opcode(name, output_size, tuint,
[src1_size, src2_size, src3_size],
- [tunsigned, tunsigned, tunsigned], "", const_expr)
+ [tuint, tuint, tuint], "", const_expr)
triop("ffma", tfloat, "src0 * src1 + src2")
@@ -559,11 +559,11 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
-opcode("bcsel", 0, tunsigned, [0, 0, 0],
- [tbool, tunsigned, tunsigned], "", "src0 ? src1 : src2")
+opcode("bcsel", 0, tuint, [0, 0, 0],
+ [tbool, tuint, tuint], "", "src0 ? src1 : src2")
-triop("bfi", tunsigned, """
-unsigned mask = src0, insert = src1 & mask, base = src2;
+triop("bfi", tuint, """
+unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
dst = base;
} else {
@@ -572,12 +572,12 @@ if (mask == 0) {
tmp >>= 1;
insert <<= 1;
}
- dst = (base & ~mask) | insert;
+ dst = (base & ~mask) | (insert & mask);
}
""")
-opcode("ubitfield_extract", 0, tunsigned,
- [0, 1, 1], [tunsigned, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint,
+ [0, 1, 1], [tuint, tint, tint], "", """
unsigned base = src0;
int offset = src1.x, bits = src2.x;
if (bits == 0) {
@@ -611,13 +611,13 @@ dst.z = src2.x;
def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
src4_size, const_expr):
- opcode(name, output_size, tunsigned,
+ opcode(name, output_size, tuint,
[src1_size, src2_size, src3_size, src4_size],
- [tunsigned, tunsigned, tunsigned, tunsigned],
+ [tuint, tuint, tuint, tuint],
"", const_expr)
-opcode("bitfield_insert", 0, tunsigned, [0, 0, 1, 1],
- [tunsigned, tunsigned, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint, [0, 0, 1, 1],
+ [tuint, tuint, tint, tint], "", """
unsigned base = src0, insert = src1;
int offset = src2.x, bits = src3.x;
if (bits == 0) {
diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c
index 7d8bdd7f2ca..cfc8e331128 100644
--- a/src/glsl/nir/nir_opt_copy_propagate.c
+++ b/src/glsl/nir/nir_opt_copy_propagate.c
@@ -55,10 +55,15 @@ static bool is_move(nir_alu_instr *instr)
static bool is_vec(nir_alu_instr *instr)
{
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa)
return false;
+ /* we handle modifiers in a separate pass */
+ if (instr->src[i].abs || instr->src[i].negate)
+ return false;
+ }
+
return instr->op == nir_op_vec2 ||
instr->op == nir_op_vec3 ||
instr->op == nir_op_vec4;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 2db209d434d..76bfc47c2a0 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -512,7 +512,9 @@ print_tex_instr(nir_tex_instr *instr, print_state *state)
case nir_texop_texture_samples:
fprintf(fp, "texture_samples ");
break;
-
+ case nir_texop_samples_identical:
+ fprintf(fp, "samples_identical ");
+ break;
default:
unreachable("Invalid texture operation");
break;
@@ -985,6 +987,16 @@ nir_print_shader(nir_shader *shader, FILE *fp)
fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage));
+ if (shader->info.name)
+ fprintf(fp, "name: %s\n", shader->info.name);
+
+ if (shader->info.label)
+ fprintf(fp, "label: %s\n", shader->info.label);
+
+ fprintf(fp, "inputs: %u\n", shader->num_inputs);
+ fprintf(fp, "outputs: %u\n", shader->num_outputs);
+ fprintf(fp, "uniforms: %u\n", shader->num_uniforms);
+
nir_foreach_variable(var, &shader->uniforms) {
print_var_decl(var, &state);
}
diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c
index bb154407914..56d7e8162f3 100644
--- a/src/glsl/nir/nir_search.c
+++ b/src/glsl/nir/nir_search.c
@@ -166,7 +166,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
}
return true;
case nir_type_int:
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
for (unsigned i = 0; i < num_components; ++i) {
if (load->value.i[new_swizzle[i]] != const_val->data.i)
@@ -310,7 +310,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
load->value.i[0] = c->data.i;
break;
- case nir_type_unsigned:
+ case nir_type_uint:
case nir_type_bool:
load->value.u[0] = c->data.u;
break;
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index ed374b921fa..06879d64ee2 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -290,11 +290,11 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
{
assert(instr->op < nir_num_opcodes);
- validate_alu_dest(&instr->dest, state);
-
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
validate_alu_src(instr, i, state);
}
+
+ validate_alu_dest(&instr->dest, state);
}
static void
@@ -375,6 +375,11 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
validate_src(&instr->src[i], state);
}
+ unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
+ for (unsigned i = 0; i < num_vars; i++) {
+ validate_deref_var(instr, instr->variables[i], state);
+ }
+
if (nir_intrinsic_infos[instr->intrinsic].has_dest) {
unsigned components_written =
nir_intrinsic_infos[instr->intrinsic].dest_components;
@@ -392,11 +397,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
validate_dest(&instr->dest, state);
}
- unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables;
- for (unsigned i = 0; i < num_vars; i++) {
- validate_deref_var(instr, instr->variables[i], state);
- }
-
switch (instr->intrinsic) {
case nir_intrinsic_load_var: {
const struct glsl_type *type =
@@ -434,8 +434,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
static void
validate_tex_instr(nir_tex_instr *instr, validate_state *state)
{
- validate_dest(&instr->dest, state);
-
bool src_type_seen[nir_num_tex_src_types];
for (unsigned i = 0; i < nir_num_tex_src_types; i++)
src_type_seen[i] = false;
@@ -448,6 +446,8 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
if (instr->sampler != NULL)
validate_deref_var(instr, instr->sampler, state);
+
+ validate_dest(&instr->dest, state);
}
static void
diff --git a/src/glsl/nir/spirv_to_nir.c b/src/glsl/nir/spirv_to_nir.c
index 70610ca0f66..86282d25e0a 100644
--- a/src/glsl/nir/spirv_to_nir.c
+++ b/src/glsl/nir/spirv_to_nir.c
@@ -2026,7 +2026,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
switch (glsl_get_sampler_result_type(sampler_type)) {
case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break;
- case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break;
+ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break;
case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break;
default:
unreachable("Invalid base type for sampler result");
diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp
index e38a0e93058..cd58213c019 100644
--- a/src/glsl/opt_tree_grafting.cpp
+++ b/src/glsl/opt_tree_grafting.cpp
@@ -275,6 +275,7 @@ ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
case ir_lod:
case ir_query_levels:
case ir_texture_samples:
+ case ir_samples_identical:
break;
case ir_txb:
if (do_graft(&ir->lod_info.bias))