summaryrefslogtreecommitdiffstats
path: root/src/glsl
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-11-03 15:45:04 -0800
committerJason Ekstrand <[email protected]>2015-11-03 15:45:04 -0800
commitb00e3f221b3f6dd0e87697c53331fd033b6e8676 (patch)
treea59dfeca8fd404c65da59a663e0abda301e893a2 /src/glsl
parenta1e7b8701a4687f29b013364a852aa773c80f960 (diff)
parent5d4b019d2a6d4deb4db11780618515cf1fa8a4fc (diff)
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/glsl')
-rw-r--r--src/glsl/ast_to_hir.cpp70
-rw-r--r--src/glsl/builtin_functions.cpp43
-rw-r--r--src/glsl/builtin_variables.cpp39
-rw-r--r--src/glsl/glcpp/glcpp-parse.y3
-rw-r--r--src/glsl/glsl_parser.yy6
-rw-r--r--src/glsl/glsl_parser_extras.cpp1
-rw-r--r--src/glsl/glsl_parser_extras.h2
-rw-r--r--src/glsl/link_atomics.cpp43
-rw-r--r--src/glsl/link_uniform_blocks.cpp22
-rw-r--r--src/glsl/link_uniforms.cpp77
-rw-r--r--src/glsl/linker.cpp268
-rw-r--r--src/glsl/nir/glsl_to_nir.cpp9
-rw-r--r--src/glsl/nir/nir.c17
-rw-r--r--src/glsl/nir/nir.h5
-rw-r--r--src/glsl/nir/nir_intrinsics.h12
-rw-r--r--src/glsl/nir/nir_lower_atomics.c25
-rw-r--r--src/glsl/nir/nir_opcodes.py45
-rw-r--r--src/glsl/nir/nir_opt_algebraic.py6
-rw-r--r--src/glsl/nir/nir_print.c5
-rw-r--r--src/glsl/opt_dead_builtin_varyings.cpp42
20 files changed, 493 insertions, 247 deletions
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
index 961183636a9..0a79fb14633 100644
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2423,21 +2423,6 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
const struct gl_context *const ctx = state->ctx;
unsigned max_loc = qual->location + var->type->uniform_locations() - 1;
- /* ARB_explicit_uniform_location specification states:
- *
- * "The explicitly defined locations and the generated locations
- * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one."
- *
- * "Valid locations for default-block uniform variable locations
- * are in the range of 0 to the implementation-defined maximum
- * number of uniform locations."
- */
- if (qual->location < 0) {
- _mesa_glsl_error(loc, state,
- "explicit location < 0 for uniform %s", var->name);
- return;
- }
-
if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) {
_mesa_glsl_error(loc, state, "location(s) consumed by uniform %s "
">= MAX_UNIFORM_LOCATIONS (%u)", var->name,
@@ -2528,41 +2513,30 @@ validate_explicit_location(const struct ast_type_qualifier *qual,
} else {
var->data.explicit_location = true;
- /* This bit of silliness is needed because invalid explicit locations
- * are supposed to be flagged during linking. Small negative values
- * biased by VERT_ATTRIB_GENERIC0 or FRAG_RESULT_DATA0 could alias
- * built-in values (e.g., -16+VERT_ATTRIB_GENERIC0 = VERT_ATTRIB_POS).
- * The linker needs to be able to differentiate these cases. This
- * ensures that negative values stay negative.
- */
- if (qual->location >= 0) {
- switch (state->stage) {
- case MESA_SHADER_VERTEX:
- var->data.location = (var->data.mode == ir_var_shader_in)
- ? (qual->location + VERT_ATTRIB_GENERIC0)
- : (qual->location + VARYING_SLOT_VAR0);
- break;
+ switch (state->stage) {
+ case MESA_SHADER_VERTEX:
+ var->data.location = (var->data.mode == ir_var_shader_in)
+ ? (qual->location + VERT_ATTRIB_GENERIC0)
+ : (qual->location + VARYING_SLOT_VAR0);
+ break;
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_TESS_EVAL:
- case MESA_SHADER_GEOMETRY:
- if (var->data.patch)
- var->data.location = qual->location + VARYING_SLOT_PATCH0;
- else
- var->data.location = qual->location + VARYING_SLOT_VAR0;
- break;
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ case MESA_SHADER_GEOMETRY:
+ if (var->data.patch)
+ var->data.location = qual->location + VARYING_SLOT_PATCH0;
+ else
+ var->data.location = qual->location + VARYING_SLOT_VAR0;
+ break;
- case MESA_SHADER_FRAGMENT:
- var->data.location = (var->data.mode == ir_var_shader_out)
- ? (qual->location + FRAG_RESULT_DATA0)
- : (qual->location + VARYING_SLOT_VAR0);
- break;
- case MESA_SHADER_COMPUTE:
- assert(!"Unexpected shader type");
- break;
- }
- } else {
- var->data.location = qual->location;
+ case MESA_SHADER_FRAGMENT:
+ var->data.location = (var->data.mode == ir_var_shader_out)
+ ? (qual->location + FRAG_RESULT_DATA0)
+ : (qual->location + VARYING_SLOT_VAR0);
+ break;
+ case MESA_SHADER_COMPUTE:
+ assert(!"Unexpected shader type");
+ break;
}
if (qual->flags.q.explicit_index) {
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index aae25f893e8..509a57b8813 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -401,6 +401,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state)
}
static bool
+shader_clock(const _mesa_glsl_parse_state *state)
+{
+ return state->ARB_shader_clock_enable;
+}
+
+static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
{
return state->has_shader_storage_buffer_objects();
@@ -782,6 +788,11 @@ private:
ir_function_signature *_memory_barrier(
builtin_available_predicate avail);
+ ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail,
+ const glsl_type *type);
+ ir_function_signature *_shader_clock(builtin_available_predicate avail,
+ const glsl_type *type);
+
#undef B0
#undef B1
#undef B2
@@ -952,6 +963,11 @@ builtin_builder::create_intrinsics()
add_function("__intrinsic_memory_barrier",
_memory_barrier_intrinsic(shader_image_load_store),
NULL);
+
+ add_function("__intrinsic_shader_clock",
+ _shader_clock_intrinsic(shader_clock,
+ glsl_type::uvec2_type),
+ NULL);
}
/**
@@ -2741,6 +2757,11 @@ builtin_builder::create_builtins()
_memory_barrier(shader_image_load_store),
NULL);
+ add_function("clock2x32ARB",
+ _shader_clock(shader_clock,
+ glsl_type::uvec2_type),
+ NULL);
+
#undef F
#undef FI
#undef FIUD
@@ -5251,6 +5272,28 @@ builtin_builder::_memory_barrier(builtin_available_predicate avail)
return sig;
}
+ir_function_signature *
+builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ MAKE_INTRINSIC(type, avail, 0);
+ return sig;
+}
+
+ir_function_signature *
+builtin_builder::_shader_clock(builtin_available_predicate avail,
+ const glsl_type *type)
+{
+ MAKE_SIG(type, avail, 0);
+
+ ir_variable *retval = body.make_temp(type, "clock_retval");
+
+ body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"),
+ retval, sig->parameters));
+ body.emit(ret(retval));
+ return sig;
+}
+
/** @} */
/******************************************************************************/
diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp
index a6ad1050552..c30fb9226e5 100644
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -710,7 +710,7 @@ builtin_variable_generator::generate_constants()
}
}
- if (state->is_version(430, 0) || state->ARB_compute_shader_enable) {
+ if (state->is_version(430, 310) || state->ARB_compute_shader_enable) {
add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS);
add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS);
add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS);
@@ -887,16 +887,22 @@ builtin_variable_generator::generate_uniforms()
void
builtin_variable_generator::generate_vs_special_vars()
{
+ ir_variable *var;
+
if (state->is_version(130, 300))
add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID");
if (state->ARB_draw_instanced_enable)
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB");
if (state->ARB_draw_instanced_enable || state->is_version(140, 300))
add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID");
- if (state->AMD_vertex_shader_layer_enable)
- add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- if (state->AMD_vertex_shader_viewport_index_enable)
- add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ if (state->AMD_vertex_shader_layer_enable) {
+ var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
+ if (state->AMD_vertex_shader_viewport_index_enable) {
+ var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
if (compatibility) {
add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex");
add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal");
@@ -954,9 +960,14 @@ builtin_variable_generator::generate_tes_special_vars()
void
builtin_variable_generator::generate_gs_special_vars()
{
- add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- if (state->is_version(410, 0) || state->ARB_viewport_array_enable)
- add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ ir_variable *var;
+
+ var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ if (state->is_version(410, 0) || state->ARB_viewport_array_enable) {
+ var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ }
if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable)
add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID");
@@ -970,7 +981,6 @@ builtin_variable_generator::generate_gs_special_vars()
* the specific case of gl_PrimitiveIDIn. So we don't need to treat
* gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable.
*/
- ir_variable *var;
var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn");
var->data.interpolation = INTERP_QUALIFIER_FLAT;
var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
@@ -984,14 +994,15 @@ builtin_variable_generator::generate_gs_special_vars()
void
builtin_variable_generator::generate_fs_special_vars()
{
+ ir_variable *var;
+
add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord");
add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing");
if (state->is_version(120, 100))
add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord");
if (state->is_version(150, 0)) {
- ir_variable *var =
- add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
+ var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID");
var->data.interpolation = INTERP_QUALIFIER_FLAT;
}
@@ -1043,8 +1054,10 @@ builtin_variable_generator::generate_fs_special_vars()
}
if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) {
- add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
- add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
+ var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex");
+ var->data.interpolation = INTERP_QUALIFIER_FLAT;
}
}
diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
index 1d7a3af8b74..4acccf74065 100644
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -2426,6 +2426,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
if (extensions->ARB_shader_bit_encoding)
add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1);
+ if (extensions->ARB_shader_clock)
+ add_builtin_define(parser, "GL_ARB_shader_clock", 1);
+
if (extensions->ARB_uniform_buffer_object)
add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1);
diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy
index 2f2e10d7992..4636435f191 100644
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -948,7 +948,8 @@ parameter_qualifier:
if ($2.precision != ast_precision_none)
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
- if (!state->has_420pack() && $2.flags.i != 0)
+ if (!(state->has_420pack() || state->is_version(420, 310)) &&
+ $2.flags.i != 0)
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
$$ = $2;
@@ -1847,7 +1848,8 @@ type_qualifier:
if ($2.precision != ast_precision_none)
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
- if (!state->has_420pack() && $2.flags.i != 0)
+ if (!(state->has_420pack() || state->is_version(420, 310)) &&
+ $2.flags.i != 0)
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
$$ = $2;
diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 692b1228ee9..f856a200e09 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -606,6 +606,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = {
EXT(ARB_separate_shader_objects, true, false, dummy_true),
EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters),
EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding),
+ EXT(ARB_shader_clock, true, false, ARB_shader_clock),
EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store),
EXT(ARB_shader_image_size, true, false, ARB_shader_image_size),
EXT(ARB_shader_precision, true, false, ARB_shader_precision),
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index e8740f9ecb9..b54c5359149 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -519,6 +519,8 @@ struct _mesa_glsl_parse_state {
bool ARB_shader_atomic_counters_warn;
bool ARB_shader_bit_encoding_enable;
bool ARB_shader_bit_encoding_warn;
+ bool ARB_shader_clock_enable;
+ bool ARB_shader_clock_warn;
bool ARB_shader_image_load_store_enable;
bool ARB_shader_image_load_store_warn;
bool ARB_shader_image_size_enable;
diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
index 70ef0e1c891..cdcc06d53e2 100644
--- a/src/glsl/link_atomics.cpp
+++ b/src/glsl/link_atomics.cpp
@@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
struct gl_shader_program *prog)
{
unsigned num_buffers;
+ unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {};
active_atomic_buffer *abs =
find_active_atomic_counters(ctx, prog, &num_buffers);
@@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
}
/* Assign stage-specific fields. */
- for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j)
- mab.StageReferences[j] =
- (ab.stage_references[j] ? GL_TRUE : GL_FALSE);
+ for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (ab.stage_references[j]) {
+ mab.StageReferences[j] = GL_TRUE;
+ num_atomic_buffers[j]++;
+ } else {
+ mab.StageReferences[j] = GL_FALSE;
+ }
+ }
i++;
}
+ /* Store a list pointers to atomic buffers per stage and store the index
+ * to the intra-stage buffer list in uniform storage.
+ */
+ for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) {
+ prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j];
+ prog->_LinkedShaders[j]->AtomicBuffers =
+ rzalloc_array(prog, gl_active_atomic_buffer *,
+ num_atomic_buffers[j]);
+
+ unsigned intra_stage_idx = 0;
+ for (unsigned i = 0; i < num_buffers; i++) {
+ struct gl_active_atomic_buffer *atomic_buffer =
+ &prog->AtomicBuffers[i];
+ if (atomic_buffer->StageReferences[j]) {
+ prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] =
+ atomic_buffer;
+
+ for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) {
+ prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index =
+ intra_stage_idx;
+ prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active =
+ true;
+ }
+
+ intra_stage_idx++;
+ }
+ }
+ }
+ }
+
delete [] abs;
assert(i == num_buffers);
}
diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp
index 5285d8d01e4..d5d30bb0a0d 100644
--- a/src/glsl/link_uniform_blocks.cpp
+++ b/src/glsl/link_uniform_blocks.cpp
@@ -100,7 +100,7 @@ private:
virtual void visit_field(const glsl_type *type, const char *name,
bool row_major, const glsl_type *,
const unsigned packing,
- bool /* last_field */)
+ bool last_field)
{
assert(this->index < this->num_variables);
@@ -131,12 +131,28 @@ private:
unsigned alignment = 0;
unsigned size = 0;
+ /* From ARB_program_interface_query:
+ *
+ * "If the final member of an active shader storage block is array
+ * with no declared size, the minimum buffer size is computed
+ * assuming the array was declared as an array with one element."
+ *
+ * For that reason, we use the base type of the unsized array to calculate
+ * its size. We don't need to check if the unsized array is the last member
+ * of a shader storage block (that check was already done by the parser).
+ */
+ const glsl_type *type_for_size = type;
+ if (type->is_unsized_array()) {
+ assert(last_field);
+ type_for_size = type->without_array();
+ }
+
if (packing == GLSL_INTERFACE_PACKING_STD430) {
alignment = type->std430_base_alignment(v->RowMajor);
- size = type->std430_size(v->RowMajor);
+ size = type_for_size->std430_size(v->RowMajor);
} else {
alignment = type->std140_base_alignment(v->RowMajor);
- size = type->std140_size(v->RowMajor);
+ size = type_for_size->std140_size(v->RowMajor);
}
this->offset = glsl_align(this->offset, alignment);
diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index 8183e65d2f5..47bb7717f84 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -1010,38 +1010,37 @@ link_update_uniform_buffer_variables(struct gl_shader *shader)
}
}
-/**
- * Scan the program for image uniforms and store image unit access
- * information into the gl_shader data structure.
- */
static void
-link_set_image_access_qualifiers(struct gl_shader_program *prog)
+link_set_image_access_qualifiers(struct gl_shader_program *prog,
+ gl_shader *sh, unsigned shader_stage,
+ ir_variable *var, const glsl_type *type,
+ char **name, size_t name_length)
{
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- gl_shader *sh = prog->_LinkedShaders[i];
-
- if (sh == NULL)
- continue;
+ /* Handle arrays of arrays */
+ if (type->is_array() && type->fields.array->is_array()) {
+ for (unsigned i = 0; i < type->length; i++) {
+ size_t new_length = name_length;
- foreach_in_list(ir_instruction, node, sh->ir) {
- ir_variable *var = node->as_variable();
+ /* Append the subscript to the current variable name */
+ ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
- if (var && var->data.mode == ir_var_uniform &&
- var->type->contains_image()) {
- unsigned id = 0;
- bool found = prog->UniformHash->get(id, var->name);
- assert(found);
- (void) found;
- const gl_uniform_storage *storage = &prog->UniformStorage[id];
- const unsigned index = storage->opaque[i].index;
- const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
- var->data.image_write_only ? GL_WRITE_ONLY :
- GL_READ_WRITE);
-
- for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
- sh->ImageAccess[index + j] = access;
- }
+ link_set_image_access_qualifiers(prog, sh, shader_stage, var,
+ type->fields.array, name,
+ new_length);
}
+ } else {
+ unsigned id = 0;
+ bool found = prog->UniformHash->get(id, *name);
+ assert(found);
+ (void) found;
+ const gl_uniform_storage *storage = &prog->UniformStorage[id];
+ const unsigned index = storage->opaque[shader_stage].index;
+ const GLenum access = (var->data.image_read_only ? GL_READ_ONLY :
+ var->data.image_write_only ? GL_WRITE_ONLY :
+ GL_READ_WRITE);
+
+ for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j)
+ sh->ImageAccess[index + j] = access;
}
}
@@ -1305,7 +1304,29 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
prog->NumHiddenUniforms = hidden_uniforms;
prog->UniformStorage = uniforms;
- link_set_image_access_qualifiers(prog);
+ /**
+ * Scan the program for image uniforms and store image unit access
+ * information into the gl_shader data structure.
+ */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ gl_shader *sh = prog->_LinkedShaders[i];
+
+ if (sh == NULL)
+ continue;
+
+ foreach_in_list(ir_instruction, node, sh->ir) {
+ ir_variable *var = node->as_variable();
+
+ if (var && var->data.mode == ir_var_uniform &&
+ var->type->contains_image()) {
+ char *name_copy = ralloc_strdup(NULL, var->name);
+ link_set_image_access_qualifiers(prog, sh, i, var, var->type,
+ &name_copy, strlen(var->name));
+ ralloc_free(name_copy);
+ }
+ }
+ }
+
link_set_uniform_initializers(prog, boolean_true);
return;
diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 07ea0e0c7e5..c35d87acea6 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2282,6 +2282,22 @@ resize_tes_inputs(struct gl_context *ctx,
foreach_in_list(ir_instruction, ir, tes->ir) {
ir->accept(&input_resize_visitor);
}
+
+ if (tcs) {
+ /* Convert the gl_PatchVerticesIn system value into a constant, since
+ * the value is known at this point.
+ */
+ foreach_in_list(ir_instruction, ir, tes->ir) {
+ ir_variable *var = ir->as_variable();
+ if (var && var->data.mode == ir_var_system_value &&
+ var->data.location == SYSTEM_VALUE_VERTICES_IN) {
+ void *mem_ctx = ralloc_parent(var);
+ var->data.mode = ir_var_auto;
+ var->data.location = 0;
+ var->constant_value = new(mem_ctx) ir_constant(num_vertices);
+ }
+ }
+ }
}
/**
@@ -3137,7 +3153,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
GLenum type, const char *name)
{
bool found_interface = false;
- const char *block_name = NULL;
+ unsigned block_name_len = 0;
+ const char *block_name_dot = strchr(name, '.');
/* These rules only apply to buffer variables. So we return
* true for the rest of types.
@@ -3146,8 +3163,28 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
return true;
for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
- block_name = shProg->BufferInterfaceBlocks[i].Name;
- if (strncmp(block_name, name, strlen(block_name)) == 0) {
+ const char *block_name = shProg->BufferInterfaceBlocks[i].Name;
+ block_name_len = strlen(block_name);
+
+ const char *block_square_bracket = strchr(block_name, '[');
+ if (block_square_bracket) {
+ /* The block is part of an array of named interfaces,
+ * for the name comparison we ignore the "[x]" part.
+ */
+ block_name_len -= strlen(block_square_bracket);
+ }
+
+ if (block_name_dot) {
+ /* Check if the variable name starts with the interface
+ * name. The interface name (if present) should have the
+ * length than the interface block name we are comparing to.
+ */
+ unsigned len = strlen(name) - strlen(block_name_dot);
+ if (len != block_name_len)
+ continue;
+ }
+
+ if (strncmp(block_name, name, block_name_len) == 0) {
found_interface = true;
break;
}
@@ -3157,7 +3194,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
* including the dot that follows it.
*/
if (found_interface)
- name = name + strlen(block_name) + 1;
+ name = name + block_name_len + 1;
/* From: ARB_program_interface_query extension:
*
@@ -3166,14 +3203,14 @@ should_add_buffer_variable(struct gl_shader_program *shProg,
* of its type. For arrays of aggregate types, the enumeration rules are
* applied recursively for the single enumerated array element.
*/
- const char *first_dot = strchr(name, '.');
+ const char *struct_first_dot = strchr(name, '.');
const char *first_square_bracket = strchr(name, '[');
/* The buffer variable is on top level and it is not an array */
if (!first_square_bracket) {
return true;
/* The shader storage block member is a struct, then generate the entry */
- } else if (first_dot && first_dot < first_square_bracket) {
+ } else if (struct_first_dot && struct_first_dot < first_square_bracket) {
return true;
} else {
/* Shader storage block member is an array, only generate an entry for the
@@ -3349,6 +3386,12 @@ add_interface_variables(struct gl_shader_program *shProg,
if (strncmp(var->name, "packed:", 7) == 0)
continue;
+ /* Skip fragdata arrays, these are handled separately
+ * by add_fragdata_arrays.
+ */
+ if (strncmp(var->name, "gl_out_FragData", 15) == 0)
+ continue;
+
if (!add_program_resource(shProg, programInterface, var,
build_stageref(shProg, var->name,
var->data.mode) | mask))
@@ -3388,6 +3431,26 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage)
return true;
}
+static bool
+add_fragdata_arrays(struct gl_shader_program *shProg)
+{
+ struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+ if (!sh || !sh->fragdata_arrays)
+ return true;
+
+ foreach_in_list(ir_instruction, node, sh->fragdata_arrays) {
+ ir_variable *var = node->as_variable();
+ if (var) {
+ assert(var->data.mode == ir_var_shader_out);
+ if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, var,
+ 1 << MESA_SHADER_FRAGMENT))
+ return false;
+ }
+ }
+ return true;
+}
+
static char*
get_top_level_name(const char *name)
{
@@ -3467,80 +3530,78 @@ is_top_level_shader_storage_block_member(const char* name,
return result;
}
-static void
-calculate_array_size(struct gl_shader_program *shProg,
- struct gl_uniform_storage *uni)
+static int
+get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field,
+ char *interface_name, char *var_name)
{
- int block_index = uni->block_index;
- int array_size = -1;
- char *var_name = get_top_level_name(uni->name);
- char *interface_name =
- get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name);
-
- if (strcmp(var_name, interface_name) == 0) {
- /* Deal with instanced array of SSBOs */
- char *temp_name = get_var_name(uni->name);
- free(var_name);
- var_name = get_top_level_name(temp_name);
- free(temp_name);
- }
-
- for (unsigned i = 0; i < shProg->NumShaders; i++) {
- if (shProg->Shaders[i] == NULL)
- continue;
-
- const gl_shader *stage = shProg->Shaders[i];
- foreach_in_list(ir_instruction, node, stage->ir) {
- ir_variable *var = node->as_variable();
- if (!var || !var->get_interface_type() ||
- var->data.mode != ir_var_shader_storage)
- continue;
-
- const glsl_type *interface = var->get_interface_type();
-
- if (strcmp(interface_name, interface->name) != 0)
- continue;
-
- for (unsigned i = 0; i < interface->length; i++) {
- const glsl_struct_field *field = &interface->fields.structure[i];
- if (strcmp(field->name, var_name) != 0)
- continue;
- /* From GL_ARB_program_interface_query spec:
- *
- * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
- * identifying the number of active array elements of the top-level
- * shader storage block member containing to the active variable is
- * written to <params>. If the top-level block member is not
- * declared as an array, the value one is written to <params>. If
- * the top-level block member is an array with no declared size,
- * the value zero is written to <params>.
- */
- if (is_top_level_shader_storage_block_member(uni->name,
- interface_name,
- var_name))
- array_size = 1;
- else if (field->type->is_unsized_array())
- array_size = 0;
- else if (field->type->is_array())
- array_size = field->type->length;
- else
- array_size = 1;
+ /* From GL_ARB_program_interface_query spec:
+ *
+ * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer
+ * identifying the number of active array elements of the top-level
+ * shader storage block member containing to the active variable is
+ * written to <params>. If the top-level block member is not
+ * declared as an array, the value one is written to <params>. If
+ * the top-level block member is an array with no declared size,
+ * the value zero is written to <params>.
+ */
+ if (is_top_level_shader_storage_block_member(uni->name,
+ interface_name,
+ var_name))
+ return 1;
+ else if (field->type->is_unsized_array())
+ return 0;
+ else if (field->type->is_array())
+ return field->type->length;
+
+ return 1;
+}
- goto found_top_level_array_size;
- }
+static int
+get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface,
+ const glsl_struct_field *field, char *interface_name,
+ char *var_name)
+{
+ /* From GL_ARB_program_interface_query:
+ *
+ * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
+ * identifying the stride between array elements of the top-level
+ * shader storage block member containing the active variable is
+ * written to <params>. For top-level block members declared as
+ * arrays, the value written is the difference, in basic machine
+ * units, between the offsets of the active variable for
+ * consecutive elements in the top-level array. For top-level
+ * block members not declared as an array, zero is written to
+ * <params>."
+ */
+ if (field->type->is_array()) {
+ const enum glsl_matrix_layout matrix_layout =
+ glsl_matrix_layout(field->matrix_layout);
+ bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
+ const glsl_type *array_type = field->type->fields.array;
+
+ if (is_top_level_shader_storage_block_member(uni->name,
+ interface_name,
+ var_name))
+ return 0;
+
+ if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
+ if (array_type->is_record() || array_type->is_array())
+ return glsl_align(array_type->std140_size(row_major), 16);
+ else
+ return MAX2(array_type->std140_base_alignment(row_major), 16);
+ } else {
+ return array_type->std430_array_stride(row_major);
}
}
-found_top_level_array_size:
- free(interface_name);
- free(var_name);
- uni->top_level_array_size = array_size;
+ return 0;
}
static void
-calculate_array_stride(struct gl_shader_program *shProg,
- struct gl_uniform_storage *uni)
+calculate_array_size_and_stride(struct gl_shader_program *shProg,
+ struct gl_uniform_storage *uni)
{
int block_index = uni->block_index;
+ int array_size = -1;
int array_stride = -1;
char *var_name = get_top_level_name(uni->name);
char *interface_name =
@@ -3549,9 +3610,17 @@ calculate_array_stride(struct gl_shader_program *shProg,
if (strcmp(var_name, interface_name) == 0) {
/* Deal with instanced array of SSBOs */
char *temp_name = get_var_name(uni->name);
+ if (!temp_name) {
+ linker_error(shProg, "Out of memory during linking.\n");
+ goto write_top_level_array_size_and_stride;
+ }
free(var_name);
var_name = get_top_level_name(temp_name);
free(temp_name);
+ if (!var_name) {
+ linker_error(shProg, "Out of memory during linking.\n");
+ goto write_top_level_array_size_and_stride;
+ }
}
for (unsigned i = 0; i < shProg->NumShaders; i++) {
@@ -3567,61 +3636,26 @@ calculate_array_stride(struct gl_shader_program *shProg,
const glsl_type *interface = var->get_interface_type();
- if (strcmp(interface_name, interface->name) != 0) {
+ if (strcmp(interface_name, interface->name) != 0)
continue;
- }
for (unsigned i = 0; i < interface->length; i++) {
const glsl_struct_field *field = &interface->fields.structure[i];
if (strcmp(field->name, var_name) != 0)
continue;
- /* From GL_ARB_program_interface_query:
- *
- * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer
- * identifying the stride between array elements of the top-level
- * shader storage block member containing the active variable is
- * written to <params>. For top-level block members declared as
- * arrays, the value written is the difference, in basic machine
- * units, between the offsets of the active variable for
- * consecutive elements in the top-level array. For top-level
- * block members not declared as an array, zero is written to
- * <params>."
- */
- if (field->type->is_array()) {
- const enum glsl_matrix_layout matrix_layout =
- glsl_matrix_layout(field->matrix_layout);
- bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR;
- const glsl_type *array_type = field->type->fields.array;
-
- if (is_top_level_shader_storage_block_member(uni->name,
- interface_name,
- var_name)) {
- array_stride = 0;
- goto found_top_level_array_stride;
- }
- if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) {
- if (array_type->is_record() || array_type->is_array()) {
- array_stride = array_type->std140_size(row_major);
- array_stride = glsl_align(array_stride, 16);
- } else {
- unsigned element_base_align = 0;
- element_base_align = array_type->std140_base_alignment(row_major);
- array_stride = MAX2(element_base_align, 16);
- }
- } else {
- array_stride = array_type->std430_array_stride(row_major);
- }
- } else {
- array_stride = 0;
- }
- goto found_top_level_array_stride;
+
+ array_stride = get_array_stride(uni, interface, field,
+ interface_name, var_name);
+ array_size = get_array_size(uni, field, interface_name, var_name);
+ goto write_top_level_array_size_and_stride;
}
}
}
-found_top_level_array_stride:
+write_top_level_array_size_and_stride:
free(interface_name);
free(var_name);
uni->top_level_array_stride = array_stride;
+ uni->top_level_array_size = array_size;
}
/**
@@ -3664,6 +3698,9 @@ build_program_resource_list(struct gl_shader_program *shProg)
return;
}
+ if (!add_fragdata_arrays(shProg))
+ return;
+
/* Add inputs and outputs to the resource list. */
if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir,
GL_PROGRAM_INPUT))
@@ -3709,8 +3746,7 @@ build_program_resource_list(struct gl_shader_program *shProg)
continue;
if (is_shader_storage) {
- calculate_array_size(shProg, &shProg->UniformStorage[i]);
- calculate_array_stride(shProg, &shProg->UniformStorage[i]);
+ calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]);
}
if (!add_program_resource(shProg, type,
diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
index 129dd02781b..ba14bbbeb6a 100644
--- a/src/glsl/nir/glsl_to_nir.cpp
+++ b/src/glsl/nir/glsl_to_nir.cpp
@@ -306,6 +306,7 @@ nir_visitor::visit(ir_variable *ir)
var->data.read_only = ir->data.read_only;
var->data.centroid = ir->data.centroid;
var->data.sample = ir->data.sample;
+ var->data.patch = ir->data.patch;
var->data.invariant = ir->data.invariant;
var->data.location = ir->data.location;
@@ -396,8 +397,6 @@ nir_visitor::visit(ir_variable *ir)
var->data.index = ir->data.index;
var->data.descriptor_set = 0;
var->data.binding = ir->data.binding;
- /* XXX Get rid of buffer_index */
- var->data.atomic.buffer_index = ir->data.binding;
var->data.atomic.offset = ir->data.atomic.offset;
var->data.image.read_only = ir->data.image_read_only;
var->data.image.write_only = ir->data.image_write_only;
@@ -722,6 +721,8 @@ nir_visitor::visit(ir_call *ir)
op = nir_intrinsic_ssbo_atomic_exchange;
} else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) {
op = nir_intrinsic_ssbo_atomic_comp_swap;
+ } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) {
+ op = nir_intrinsic_shader_clock;
} else {
unreachable("not reached");
}
@@ -826,6 +827,10 @@ nir_visitor::visit(ir_call *ir)
case nir_intrinsic_memory_barrier:
nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
break;
+ case nir_intrinsic_shader_clock:
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr);
+ break;
case nir_intrinsic_store_ssbo: {
exec_node *param = ir->actual_parameters.get_head();
ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 793bdafb54b..5f03095d673 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -1557,12 +1557,14 @@ nir_intrinsic_from_system_value(gl_system_value val)
return nir_intrinsic_load_num_work_groups;
case SYSTEM_VALUE_PRIMITIVE_ID:
return nir_intrinsic_load_primitive_id;
- /* FINISHME: Add tessellation intrinsics.
case SYSTEM_VALUE_TESS_COORD:
- case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_tess_coord;
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
+ return nir_intrinsic_load_tess_level_outer;
case SYSTEM_VALUE_TESS_LEVEL_INNER:
- */
+ return nir_intrinsic_load_tess_level_inner;
+ case SYSTEM_VALUE_VERTICES_IN:
+ return nir_intrinsic_load_patch_vertices_in;
default:
unreachable("system value does not directly correspond to intrinsic");
}
@@ -1598,13 +1600,14 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
return SYSTEM_VALUE_WORK_GROUP_ID;
case nir_intrinsic_load_primitive_id:
return SYSTEM_VALUE_PRIMITIVE_ID;
- /* FINISHME: Add tessellation intrinsics.
+ case nir_intrinsic_load_tess_coord:
return SYSTEM_VALUE_TESS_COORD;
- return SYSTEM_VALUE_VERTICES_IN;
- return SYSTEM_VALUE_PRIMITIVE_ID;
+ case nir_intrinsic_load_tess_level_outer:
return SYSTEM_VALUE_TESS_LEVEL_OUTER;
+ case nir_intrinsic_load_tess_level_inner:
return SYSTEM_VALUE_TESS_LEVEL_INNER;
- */
+ case nir_intrinsic_load_patch_vertices_in:
+ return SYSTEM_VALUE_VERTICES_IN;
default:
unreachable("intrinsic doesn't produce a system value");
}
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 229d534bf3d..9b278d6a767 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -171,6 +171,7 @@ typedef struct {
unsigned read_only:1;
unsigned centroid:1;
unsigned sample:1;
+ unsigned patch:1;
unsigned invariant:1;
/**
@@ -313,7 +314,6 @@ typedef struct {
* Location an atomic counter is stored at.
*/
struct {
- unsigned buffer_index;
unsigned offset;
} atomic;
@@ -2016,7 +2016,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
void nir_lower_two_sided_color(nir_shader *shader);
-void nir_lower_atomics(nir_shader *shader);
+void nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program);
void nir_lower_to_source_mods(nir_shader *shader);
bool nir_lower_gs_intrinsics(nir_shader *shader);
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index b2ceff566cf..9fd91de157f 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -83,6 +83,14 @@ BARRIER(discard)
*/
BARRIER(memory_barrier)
+/*
+ * Shader clock intrinsic with semantics analogous to the clock2x32ARB()
+ * GLSL intrinsic.
+ * The latter can be used as code motion barrier, which is currently not
+ * feasible with NIR.
+ */
+INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE)
+
/** A conditional discard, with a single boolean source. */
INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0)
@@ -217,6 +225,10 @@ SYSTEM_VALUE(sample_pos, 2, 0)
SYSTEM_VALUE(sample_mask_in, 1, 0)
SYSTEM_VALUE(primitive_id, 1, 0)
SYSTEM_VALUE(invocation_id, 1, 0)
+SYSTEM_VALUE(tess_coord, 3, 0)
+SYSTEM_VALUE(tess_level_outer, 4, 0)
+SYSTEM_VALUE(tess_level_inner, 2, 0)
+SYSTEM_VALUE(patch_vertices_in, 1, 0)
SYSTEM_VALUE(local_invocation_id, 3, 0)
SYSTEM_VALUE(work_group_id, 3, 0)
SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */
diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
index 46e137652a1..40ca3de96cf 100644
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -25,17 +25,24 @@
*
*/
+#include "ir_uniform.h"
#include "nir.h"
#include "main/config.h"
#include <assert.h>
+typedef struct {
+ const struct gl_shader_program *shader_program;
+ nir_shader *shader;
+} lower_atomic_state;
+
/*
* replace atomic counter intrinsics that use a variable with intrinsics
* that directly store the buffer index and byte offset
*/
static void
-lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
+lower_instr(nir_intrinsic_instr *instr,
+ lower_atomic_state *state)
{
nir_intrinsic_op op;
switch (instr->intrinsic) {
@@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
return; /* atomics passed as function arguments can't be lowered */
void *mem_ctx = ralloc_parent(instr);
+ unsigned uniform_loc = instr->variables[0]->var->data.location;
nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
new_instr->const_index[0] =
- (int) instr->variables[0]->var->data.atomic.buffer_index;
+ state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
@@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state)
{
nir_foreach_instr_safe(block, instr) {
if (instr->type == nir_instr_type_intrinsic)
- lower_instr(nir_instr_as_intrinsic(instr), state);
+ lower_instr(nir_instr_as_intrinsic(instr),
+ (lower_atomic_state *) state);
}
return true;
}
void
-nir_lower_atomics(nir_shader *shader)
+nir_lower_atomics(nir_shader *shader,
+ const struct gl_shader_program *shader_program)
{
+ lower_atomic_state state = {
+ .shader = shader,
+ .shader_program = shader_program,
+ };
+
nir_foreach_overload(shader, overload) {
if (overload->impl) {
- nir_foreach_block(overload->impl, lower_block, overload->impl);
+ nir_foreach_block(overload->impl, lower_block, (void *) &state);
nir_metadata_preserve(overload->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py
index f2d584fe484..3c0f1da94af 100644
--- a/src/glsl/nir/nir_opcodes.py
+++ b/src/glsl/nir/nir_opcodes.py
@@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)")
binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0")
+# Saturated vector add for 4 8bit ints.
+binop("usadd_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
+}
+""")
+
+# Saturated vector subtract for 4 8bit ints.
+binop("ussub_4x8", tint, "", """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ if (src0_chan > src1_chan)
+ dst |= (src0_chan - src1_chan) << i;
+}
+""")
+
+# vector min for 4 8bit ints.
+binop("umin_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# vector max for 4 8bit ints.
+binop("umax_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
+}
+""")
+
+# unorm multiply: (a * b) / 255.
+binop("umul_unorm_4x8", tint, commutative + associative, """
+dst = 0;
+for (int i = 0; i < 32; i += 8) {
+ int src0_chan = (src0 >> i) & 0xff;
+ int src1_chan = (src1 >> i) & 0xff;
+ dst |= ((src0_chan * src1_chan) / 255) << i;
+}
+""")
+
binop("fpow", tfloat, "", "powf(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat,
diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py
index cafbd6d66a5..30ede52b146 100644
--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -56,12 +56,16 @@ optimizations = [
(('iabs', ('ineg', a)), ('iabs', a)),
(('fadd', a, 0.0), a),
(('iadd', a, 0), a),
+ (('usadd_4x8', a, 0), a),
+ (('usadd_4x8', a, ~0), ~0),
(('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
(('fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
+ (('umul_unorm_4x8', a, 0), 0),
+ (('umul_unorm_4x8', a, ~0), a),
(('fmul', a, 1.0), a),
(('imul', a, 1), a),
(('fmul', a, -1.0), ('fneg', a)),
@@ -202,6 +206,8 @@ optimizations = [
# Subtracts
(('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
+ (('ussub_4x8', a, 0), a),
+ (('ussub_4x8', a, ~0), 0),
(('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'),
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 09663996869..30220c5e48d 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -228,12 +228,13 @@ print_var_decl(nir_variable *var, print_state *state)
const char *const cent = (var->data.centroid) ? "centroid " : "";
const char *const samp = (var->data.sample) ? "sample " : "";
+ const char *const patch = (var->data.patch) ? "patch " : "";
const char *const inv = (var->data.invariant) ? "invariant " : "";
const char *const mode[] = { "shader_in ", "shader_out ", "", "",
"uniform ", "shader_storage", "system " };
- fprintf(fp, "%s%s%s%s%s ",
- cent, samp, inv, mode[var->data.mode],
+ fprintf(fp, "%s%s%s%s%s%s ",
+ cent, samp, patch, inv, mode[var->data.mode],
glsl_interp_qualifier_name(var->data.interpolation));
glsl_print_type(var->type, fp);
diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp
index 31719d20c05..68b70eedf92 100644
--- a/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/src/glsl/opt_dead_builtin_varyings.cpp
@@ -269,14 +269,14 @@ public:
*/
class replace_varyings_visitor : public ir_rvalue_visitor {
public:
- replace_varyings_visitor(exec_list *ir,
+ replace_varyings_visitor(struct gl_shader *sha,
const varying_info_visitor *info,
unsigned external_texcoord_usage,
unsigned external_color_usage,
bool external_has_fog)
- : info(info), new_fog(NULL)
+ : shader(sha), info(info), new_fog(NULL)
{
- void *const ctx = ir;
+ void *const ctx = shader->ir;
memset(this->new_fragdata, 0, sizeof(this->new_fragdata));
memset(this->new_texcoord, 0, sizeof(this->new_texcoord));
@@ -293,14 +293,16 @@ public:
* occurrences of gl_TexCoord will be replaced with.
*/
if (info->lower_texcoord_array) {
- prepare_array(ir, this->new_texcoord, ARRAY_SIZE(this->new_texcoord),
+ prepare_array(shader->ir, this->new_texcoord,
+ ARRAY_SIZE(this->new_texcoord),
VARYING_SLOT_TEX0, "TexCoord", mode_str,
info->texcoord_usage, external_texcoord_usage);
}
/* Handle gl_FragData in the same way like gl_TexCoord. */
if (info->lower_fragdata_array) {
- prepare_array(ir, this->new_fragdata, ARRAY_SIZE(this->new_fragdata),
+ prepare_array(shader->ir, this->new_fragdata,
+ ARRAY_SIZE(this->new_fragdata),
FRAG_RESULT_DATA0, "FragData", mode_str,
info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1);
}
@@ -340,7 +342,7 @@ public:
}
/* Now do the replacing. */
- visit_list_elements(this, ir);
+ visit_list_elements(this, shader->ir);
}
void prepare_array(exec_list *ir,
@@ -389,6 +391,13 @@ public:
/* Remove the gl_FragData array. */
if (this->info->lower_fragdata_array &&
var == this->info->fragdata_array) {
+
+ /* Clone variable for program resource list before it is removed. */
+ if (!shader->fragdata_arrays)
+ shader->fragdata_arrays = new (shader) exec_list;
+
+ shader->fragdata_arrays->push_tail(var->clone(shader, NULL));
+
var->remove();
}
@@ -487,6 +496,7 @@ public:
}
private:
+ struct gl_shader *shader;
const varying_info_visitor *info;
ir_variable *new_fragdata[MAX_DRAW_BUFFERS];
ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
@@ -498,20 +508,20 @@ private:
} /* anonymous namespace */
static void
-lower_texcoord_array(exec_list *ir, const varying_info_visitor *info)
+lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info)
{
- replace_varyings_visitor(ir, info,
+ replace_varyings_visitor(shader, info,
(1 << MAX_TEXTURE_COORD_UNITS) - 1,
1 | 2, true);
}
static void
-lower_fragdata_array(exec_list *ir)
+lower_fragdata_array(struct gl_shader *shader)
{
varying_info_visitor info(ir_var_shader_out, true);
- info.get(ir, 0, NULL);
+ info.get(shader->ir, 0, NULL);
- replace_varyings_visitor(ir, &info, 0, 0, 0);
+ replace_varyings_visitor(shader, &info, 0, 0, 0);
}
@@ -523,7 +533,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
{
/* Lower the gl_FragData array to separate variables. */
if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) {
- lower_fragdata_array(consumer->ir);
+ lower_fragdata_array(consumer);
}
/* Lowering of built-in varyings has no effect with the core context and
@@ -544,7 +554,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (!consumer) {
/* At least eliminate unused gl_TexCoord elements. */
if (producer_info.lower_texcoord_array) {
- lower_texcoord_array(producer->ir, &producer_info);
+ lower_texcoord_array(producer, &producer_info);
}
return;
}
@@ -556,7 +566,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (!producer) {
/* At least eliminate unused gl_TexCoord elements. */
if (consumer_info.lower_texcoord_array) {
- lower_texcoord_array(consumer->ir, &consumer_info);
+ lower_texcoord_array(consumer, &consumer_info);
}
return;
}
@@ -566,7 +576,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (producer_info.lower_texcoord_array ||
producer_info.color_usage ||
producer_info.has_fog) {
- replace_varyings_visitor(producer->ir,
+ replace_varyings_visitor(producer,
&producer_info,
consumer_info.texcoord_usage,
consumer_info.color_usage,
@@ -587,7 +597,7 @@ do_dead_builtin_varyings(struct gl_context *ctx,
if (consumer_info.lower_texcoord_array ||
consumer_info.color_usage ||
consumer_info.has_fog) {
- replace_varyings_visitor(consumer->ir,
+ replace_varyings_visitor(consumer,
&consumer_info,
producer_info.texcoord_usage,
producer_info.color_usage,