diff options
author | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2015-11-03 15:45:04 -0800 |
commit | b00e3f221b3f6dd0e87697c53331fd033b6e8676 (patch) | |
tree | a59dfeca8fd404c65da59a663e0abda301e893a2 /src/glsl | |
parent | a1e7b8701a4687f29b013364a852aa773c80f960 (diff) | |
parent | 5d4b019d2a6d4deb4db11780618515cf1fa8a4fc (diff) |
Merge remote-tracking branch 'mesa-public/master' into vulkan
Diffstat (limited to 'src/glsl')
-rw-r--r-- | src/glsl/ast_to_hir.cpp | 70 | ||||
-rw-r--r-- | src/glsl/builtin_functions.cpp | 43 | ||||
-rw-r--r-- | src/glsl/builtin_variables.cpp | 39 | ||||
-rw-r--r-- | src/glsl/glcpp/glcpp-parse.y | 3 | ||||
-rw-r--r-- | src/glsl/glsl_parser.yy | 6 | ||||
-rw-r--r-- | src/glsl/glsl_parser_extras.cpp | 1 | ||||
-rw-r--r-- | src/glsl/glsl_parser_extras.h | 2 | ||||
-rw-r--r-- | src/glsl/link_atomics.cpp | 43 | ||||
-rw-r--r-- | src/glsl/link_uniform_blocks.cpp | 22 | ||||
-rw-r--r-- | src/glsl/link_uniforms.cpp | 77 | ||||
-rw-r--r-- | src/glsl/linker.cpp | 268 | ||||
-rw-r--r-- | src/glsl/nir/glsl_to_nir.cpp | 9 | ||||
-rw-r--r-- | src/glsl/nir/nir.c | 17 | ||||
-rw-r--r-- | src/glsl/nir/nir.h | 5 | ||||
-rw-r--r-- | src/glsl/nir/nir_intrinsics.h | 12 | ||||
-rw-r--r-- | src/glsl/nir/nir_lower_atomics.c | 25 | ||||
-rw-r--r-- | src/glsl/nir/nir_opcodes.py | 45 | ||||
-rw-r--r-- | src/glsl/nir/nir_opt_algebraic.py | 6 | ||||
-rw-r--r-- | src/glsl/nir/nir_print.c | 5 | ||||
-rw-r--r-- | src/glsl/opt_dead_builtin_varyings.cpp | 42 |
20 files changed, 493 insertions, 247 deletions
diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 961183636a9..0a79fb14633 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -2423,21 +2423,6 @@ validate_explicit_location(const struct ast_type_qualifier *qual, const struct gl_context *const ctx = state->ctx; unsigned max_loc = qual->location + var->type->uniform_locations() - 1; - /* ARB_explicit_uniform_location specification states: - * - * "The explicitly defined locations and the generated locations - * must be in the range of 0 to MAX_UNIFORM_LOCATIONS minus one." - * - * "Valid locations for default-block uniform variable locations - * are in the range of 0 to the implementation-defined maximum - * number of uniform locations." - */ - if (qual->location < 0) { - _mesa_glsl_error(loc, state, - "explicit location < 0 for uniform %s", var->name); - return; - } - if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, @@ -2528,41 +2513,30 @@ validate_explicit_location(const struct ast_type_qualifier *qual, } else { var->data.explicit_location = true; - /* This bit of silliness is needed because invalid explicit locations - * are supposed to be flagged during linking. Small negative values - * biased by VERT_ATTRIB_GENERIC0 or FRAG_RESULT_DATA0 could alias - * built-in values (e.g., -16+VERT_ATTRIB_GENERIC0 = VERT_ATTRIB_POS). - * The linker needs to be able to differentiate these cases. This - * ensures that negative values stay negative. - */ - if (qual->location >= 0) { - switch (state->stage) { - case MESA_SHADER_VERTEX: - var->data.location = (var->data.mode == ir_var_shader_in) - ? (qual->location + VERT_ATTRIB_GENERIC0) - : (qual->location + VARYING_SLOT_VAR0); - break; + switch (state->stage) { + case MESA_SHADER_VERTEX: + var->data.location = (var->data.mode == ir_var_shader_in) + ? (qual->location + VERT_ATTRIB_GENERIC0) + : (qual->location + VARYING_SLOT_VAR0); + break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (var->data.patch) - var->data.location = qual->location + VARYING_SLOT_PATCH0; - else - var->data.location = qual->location + VARYING_SLOT_VAR0; - break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.patch) + var->data.location = qual->location + VARYING_SLOT_PATCH0; + else + var->data.location = qual->location + VARYING_SLOT_VAR0; + break; - case MESA_SHADER_FRAGMENT: - var->data.location = (var->data.mode == ir_var_shader_out) - ? (qual->location + FRAG_RESULT_DATA0) - : (qual->location + VARYING_SLOT_VAR0); - break; - case MESA_SHADER_COMPUTE: - assert(!"Unexpected shader type"); - break; - } - } else { - var->data.location = qual->location; + case MESA_SHADER_FRAGMENT: + var->data.location = (var->data.mode == ir_var_shader_out) + ? (qual->location + FRAG_RESULT_DATA0) + : (qual->location + VARYING_SLOT_VAR0); + break; + case MESA_SHADER_COMPUTE: + assert(!"Unexpected shader type"); + break; } if (qual->flags.q.explicit_index) { diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index aae25f893e8..509a57b8813 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -401,6 +401,12 @@ shader_atomic_counters(const _mesa_glsl_parse_state *state) } static bool +shader_clock(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_clock_enable; +} + +static bool shader_storage_buffer_object(const _mesa_glsl_parse_state *state) { return state->has_shader_storage_buffer_objects(); @@ -782,6 +788,11 @@ private: ir_function_signature *_memory_barrier( builtin_available_predicate avail); + ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_shader_clock(builtin_available_predicate avail, + const glsl_type *type); + #undef B0 #undef B1 #undef B2 @@ -952,6 +963,11 @@ builtin_builder::create_intrinsics() add_function("__intrinsic_memory_barrier", _memory_barrier_intrinsic(shader_image_load_store), NULL); + + add_function("__intrinsic_shader_clock", + _shader_clock_intrinsic(shader_clock, + glsl_type::uvec2_type), + NULL); } /** @@ -2741,6 +2757,11 @@ builtin_builder::create_builtins() _memory_barrier(shader_image_load_store), NULL); + add_function("clock2x32ARB", + _shader_clock(shader_clock, + glsl_type::uvec2_type), + NULL); + #undef F #undef FI #undef FIUD @@ -5251,6 +5272,28 @@ builtin_builder::_memory_barrier(builtin_available_predicate avail) return sig; } +ir_function_signature * +builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_INTRINSIC(type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_SIG(type, avail, 0); + + ir_variable *retval = body.make_temp(type, "clock_retval"); + + body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"), + retval, sig->parameters)); + body.emit(ret(retval)); + return sig; +} + /** @} */ /******************************************************************************/ diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp index a6ad1050552..c30fb9226e5 100644 --- a/src/glsl/builtin_variables.cpp +++ b/src/glsl/builtin_variables.cpp @@ -710,7 +710,7 @@ builtin_variable_generator::generate_constants() } } - if (state->is_version(430, 0) || state->ARB_compute_shader_enable) { + if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); @@ -887,16 +887,22 @@ builtin_variable_generator::generate_uniforms() void builtin_variable_generator::generate_vs_special_vars() { + ir_variable *var; + if (state->is_version(130, 300)) add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID"); if (state->ARB_draw_instanced_enable) add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB"); if (state->ARB_draw_instanced_enable || state->is_version(140, 300)) add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID"); - if (state->AMD_vertex_shader_layer_enable) - add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - if (state->AMD_vertex_shader_viewport_index_enable) - add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + if (state->AMD_vertex_shader_layer_enable) { + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->AMD_vertex_shader_viewport_index_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } if (compatibility) { add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex"); add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal"); @@ -954,9 +960,14 @@ builtin_variable_generator::generate_tes_special_vars() void builtin_variable_generator::generate_gs_special_vars() { - add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - if (state->is_version(410, 0) || state->ARB_viewport_array_enable) - add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + ir_variable *var; + + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + if (state->is_version(410, 0) || state->ARB_viewport_array_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); @@ -970,7 +981,6 @@ builtin_variable_generator::generate_gs_special_vars() * the specific case of gl_PrimitiveIDIn. So we don't need to treat * gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable. */ - ir_variable *var; var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn"); var->data.interpolation = INTERP_QUALIFIER_FLAT; var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); @@ -984,14 +994,15 @@ builtin_variable_generator::generate_gs_special_vars() void builtin_variable_generator::generate_fs_special_vars() { + ir_variable *var; + add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord"); add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing"); if (state->is_version(120, 100)) add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); if (state->is_version(150, 0)) { - ir_variable *var = - add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); var->data.interpolation = INTERP_QUALIFIER_FLAT; } @@ -1043,8 +1054,10 @@ builtin_variable_generator::generate_fs_special_vars() } if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { - add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; } } diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y index 1d7a3af8b74..4acccf74065 100644 --- a/src/glsl/glcpp/glcpp-parse.y +++ b/src/glsl/glcpp/glcpp-parse.y @@ -2426,6 +2426,9 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio if (extensions->ARB_shader_bit_encoding) add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1); + if (extensions->ARB_shader_clock) + add_builtin_define(parser, "GL_ARB_shader_clock", 1); + if (extensions->ARB_uniform_buffer_object) add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1); diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2f2e10d7992..4636435f191 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -948,7 +948,8 @@ parameter_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!state->has_420pack() && $2.flags.i != 0) + if (!(state->has_420pack() || state->is_version(420, 310)) && + $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); $$ = $2; @@ -1847,7 +1848,8 @@ type_qualifier: if ($2.precision != ast_precision_none) _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - if (!state->has_420pack() && $2.flags.i != 0) + if (!(state->has_420pack() || state->is_version(420, 310)) && + $2.flags.i != 0) _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); $$ = $2; diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 692b1228ee9..f856a200e09 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -606,6 +606,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(ARB_separate_shader_objects, true, false, dummy_true), EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), + EXT(ARB_shader_clock, true, false, ARB_shader_clock), EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), EXT(ARB_shader_precision, true, false, ARB_shader_precision), diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index e8740f9ecb9..b54c5359149 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -519,6 +519,8 @@ struct _mesa_glsl_parse_state { bool ARB_shader_atomic_counters_warn; bool ARB_shader_bit_encoding_enable; bool ARB_shader_bit_encoding_warn; + bool ARB_shader_clock_enable; + bool ARB_shader_clock_warn; bool ARB_shader_image_load_store_enable; bool ARB_shader_image_load_store_warn; bool ARB_shader_image_size_enable; diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp index 70ef0e1c891..cdcc06d53e2 100644 --- a/src/glsl/link_atomics.cpp +++ b/src/glsl/link_atomics.cpp @@ -198,6 +198,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, struct gl_shader_program *prog) { unsigned num_buffers; + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; active_atomic_buffer *abs = find_active_atomic_counters(ctx, prog, &num_buffers); @@ -242,13 +243,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx, } /* Assign stage-specific fields. */ - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) - mab.StageReferences[j] = - (ab.stage_references[j] ? GL_TRUE : GL_FALSE); + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (ab.stage_references[j]) { + mab.StageReferences[j] = GL_TRUE; + num_atomic_buffers[j]++; + } else { + mab.StageReferences[j] = GL_FALSE; + } + } i++; } + /* Store a list pointers to atomic buffers per stage and store the index + * to the intra-stage buffer list in uniform storage. + */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; + prog->_LinkedShaders[j]->AtomicBuffers = + rzalloc_array(prog, gl_active_atomic_buffer *, + num_atomic_buffers[j]); + + unsigned intra_stage_idx = 0; + for (unsigned i = 0; i < num_buffers; i++) { + struct gl_active_atomic_buffer *atomic_buffer = + &prog->AtomicBuffers[i]; + if (atomic_buffer->StageReferences[j]) { + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = + atomic_buffer; + + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = + intra_stage_idx; + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = + true; + } + + intra_stage_idx++; + } + } + } + } + delete [] abs; assert(i == num_buffers); } diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp index 5285d8d01e4..d5d30bb0a0d 100644 --- a/src/glsl/link_uniform_blocks.cpp +++ b/src/glsl/link_uniform_blocks.cpp @@ -100,7 +100,7 @@ private: virtual void visit_field(const glsl_type *type, const char *name, bool row_major, const glsl_type *, const unsigned packing, - bool /* last_field */) + bool last_field) { assert(this->index < this->num_variables); @@ -131,12 +131,28 @@ private: unsigned alignment = 0; unsigned size = 0; + /* From ARB_program_interface_query: + * + * "If the final member of an active shader storage block is array + * with no declared size, the minimum buffer size is computed + * assuming the array was declared as an array with one element." + * + * For that reason, we use the base type of the unsized array to calculate + * its size. We don't need to check if the unsized array is the last member + * of a shader storage block (that check was already done by the parser). + */ + const glsl_type *type_for_size = type; + if (type->is_unsized_array()) { + assert(last_field); + type_for_size = type->without_array(); + } + if (packing == GLSL_INTERFACE_PACKING_STD430) { alignment = type->std430_base_alignment(v->RowMajor); - size = type->std430_size(v->RowMajor); + size = type_for_size->std430_size(v->RowMajor); } else { alignment = type->std140_base_alignment(v->RowMajor); - size = type->std140_size(v->RowMajor); + size = type_for_size->std140_size(v->RowMajor); } this->offset = glsl_align(this->offset, alignment); diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 8183e65d2f5..47bb7717f84 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -1010,38 +1010,37 @@ link_update_uniform_buffer_variables(struct gl_shader *shader) } } -/** - * Scan the program for image uniforms and store image unit access - * information into the gl_shader data structure. - */ static void -link_set_image_access_qualifiers(struct gl_shader_program *prog) +link_set_image_access_qualifiers(struct gl_shader_program *prog, + gl_shader *sh, unsigned shader_stage, + ir_variable *var, const glsl_type *type, + char **name, size_t name_length) { - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; + /* Handle arrays of arrays */ + if (type->is_array() && type->fields.array->is_array()) { + for (unsigned i = 0; i < type->length; i++) { + size_t new_length = name_length; - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - if (var && var->data.mode == ir_var_uniform && - var->type->contains_image()) { - unsigned id = 0; - bool found = prog->UniformHash->get(id, var->name); - assert(found); - (void) found; - const gl_uniform_storage *storage = &prog->UniformStorage[id]; - const unsigned index = storage->opaque[i].index; - const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : - var->data.image_write_only ? GL_WRITE_ONLY : - GL_READ_WRITE); - - for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) - sh->ImageAccess[index + j] = access; - } + link_set_image_access_qualifiers(prog, sh, shader_stage, var, + type->fields.array, name, + new_length); } + } else { + unsigned id = 0; + bool found = prog->UniformHash->get(id, *name); + assert(found); + (void) found; + const gl_uniform_storage *storage = &prog->UniformStorage[id]; + const unsigned index = storage->opaque[shader_stage].index; + const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : + var->data.image_write_only ? GL_WRITE_ONLY : + GL_READ_WRITE); + + for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) + sh->ImageAccess[index + j] = access; } } @@ -1305,7 +1304,29 @@ link_assign_uniform_locations(struct gl_shader_program *prog, prog->NumHiddenUniforms = hidden_uniforms; prog->UniformStorage = uniforms; - link_set_image_access_qualifiers(prog); + /** + * Scan the program for image uniforms and store image unit access + * information into the gl_shader data structure. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->data.mode == ir_var_uniform && + var->type->contains_image()) { + char *name_copy = ralloc_strdup(NULL, var->name); + link_set_image_access_qualifiers(prog, sh, i, var, var->type, + &name_copy, strlen(var->name)); + ralloc_free(name_copy); + } + } + } + link_set_uniform_initializers(prog, boolean_true); return; diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 07ea0e0c7e5..c35d87acea6 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -2282,6 +2282,22 @@ resize_tes_inputs(struct gl_context *ctx, foreach_in_list(ir_instruction, ir, tes->ir) { ir->accept(&input_resize_visitor); } + + if (tcs) { + /* Convert the gl_PatchVerticesIn system value into a constant, since + * the value is known at this point. + */ + foreach_in_list(ir_instruction, ir, tes->ir) { + ir_variable *var = ir->as_variable(); + if (var && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTICES_IN) { + void *mem_ctx = ralloc_parent(var); + var->data.mode = ir_var_auto; + var->data.location = 0; + var->constant_value = new(mem_ctx) ir_constant(num_vertices); + } + } + } } /** @@ -3137,7 +3153,8 @@ should_add_buffer_variable(struct gl_shader_program *shProg, GLenum type, const char *name) { bool found_interface = false; - const char *block_name = NULL; + unsigned block_name_len = 0; + const char *block_name_dot = strchr(name, '.'); /* These rules only apply to buffer variables. So we return * true for the rest of types. @@ -3146,8 +3163,28 @@ should_add_buffer_variable(struct gl_shader_program *shProg, return true; for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - block_name = shProg->BufferInterfaceBlocks[i].Name; - if (strncmp(block_name, name, strlen(block_name)) == 0) { + const char *block_name = shProg->BufferInterfaceBlocks[i].Name; + block_name_len = strlen(block_name); + + const char *block_square_bracket = strchr(block_name, '['); + if (block_square_bracket) { + /* The block is part of an array of named interfaces, + * for the name comparison we ignore the "[x]" part. + */ + block_name_len -= strlen(block_square_bracket); + } + + if (block_name_dot) { + /* Check if the variable name starts with the interface + * name. The interface name (if present) should have the + * length than the interface block name we are comparing to. + */ + unsigned len = strlen(name) - strlen(block_name_dot); + if (len != block_name_len) + continue; + } + + if (strncmp(block_name, name, block_name_len) == 0) { found_interface = true; break; } @@ -3157,7 +3194,7 @@ should_add_buffer_variable(struct gl_shader_program *shProg, * including the dot that follows it. */ if (found_interface) - name = name + strlen(block_name) + 1; + name = name + block_name_len + 1; /* From: ARB_program_interface_query extension: * @@ -3166,14 +3203,14 @@ should_add_buffer_variable(struct gl_shader_program *shProg, * of its type. For arrays of aggregate types, the enumeration rules are * applied recursively for the single enumerated array element. */ - const char *first_dot = strchr(name, '.'); + const char *struct_first_dot = strchr(name, '.'); const char *first_square_bracket = strchr(name, '['); /* The buffer variable is on top level and it is not an array */ if (!first_square_bracket) { return true; /* The shader storage block member is a struct, then generate the entry */ - } else if (first_dot && first_dot < first_square_bracket) { + } else if (struct_first_dot && struct_first_dot < first_square_bracket) { return true; } else { /* Shader storage block member is an array, only generate an entry for the @@ -3349,6 +3386,12 @@ add_interface_variables(struct gl_shader_program *shProg, if (strncmp(var->name, "packed:", 7) == 0) continue; + /* Skip fragdata arrays, these are handled separately + * by add_fragdata_arrays. + */ + if (strncmp(var->name, "gl_out_FragData", 15) == 0) + continue; + if (!add_program_resource(shProg, programInterface, var, build_stageref(shProg, var->name, var->data.mode) | mask)) @@ -3388,6 +3431,26 @@ add_packed_varyings(struct gl_shader_program *shProg, int stage) return true; } +static bool +add_fragdata_arrays(struct gl_shader_program *shProg) +{ + struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + if (!sh || !sh->fragdata_arrays) + return true; + + foreach_in_list(ir_instruction, node, sh->fragdata_arrays) { + ir_variable *var = node->as_variable(); + if (var) { + assert(var->data.mode == ir_var_shader_out); + if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, var, + 1 << MESA_SHADER_FRAGMENT)) + return false; + } + } + return true; +} + static char* get_top_level_name(const char *name) { @@ -3467,80 +3530,78 @@ is_top_level_shader_storage_block_member(const char* name, return result; } -static void -calculate_array_size(struct gl_shader_program *shProg, - struct gl_uniform_storage *uni) +static int +get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field, + char *interface_name, char *var_name) { - int block_index = uni->block_index; - int array_size = -1; - char *var_name = get_top_level_name(uni->name); - char *interface_name = - get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); - - if (strcmp(var_name, interface_name) == 0) { - /* Deal with instanced array of SSBOs */ - char *temp_name = get_var_name(uni->name); - free(var_name); - var_name = get_top_level_name(temp_name); - free(temp_name); - } - - for (unsigned i = 0; i < shProg->NumShaders; i++) { - if (shProg->Shaders[i] == NULL) - continue; - - const gl_shader *stage = shProg->Shaders[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - var->data.mode != ir_var_shader_storage) - continue; - - const glsl_type *interface = var->get_interface_type(); - - if (strcmp(interface_name, interface->name) != 0) - continue; - - for (unsigned i = 0; i < interface->length; i++) { - const glsl_struct_field *field = &interface->fields.structure[i]; - if (strcmp(field->name, var_name) != 0) - continue; - /* From GL_ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer - * identifying the number of active array elements of the top-level - * shader storage block member containing to the active variable is - * written to <params>. If the top-level block member is not - * declared as an array, the value one is written to <params>. If - * the top-level block member is an array with no declared size, - * the value zero is written to <params>. - */ - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) - array_size = 1; - else if (field->type->is_unsized_array()) - array_size = 0; - else if (field->type->is_array()) - array_size = field->type->length; - else - array_size = 1; + /* From GL_ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer + * identifying the number of active array elements of the top-level + * shader storage block member containing to the active variable is + * written to <params>. If the top-level block member is not + * declared as an array, the value one is written to <params>. If + * the top-level block member is an array with no declared size, + * the value zero is written to <params>. + */ + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 1; + else if (field->type->is_unsized_array()) + return 0; + else if (field->type->is_array()) + return field->type->length; + + return 1; +} - goto found_top_level_array_size; - } +static int +get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface, + const glsl_struct_field *field, char *interface_name, + char *var_name) +{ + /* From GL_ARB_program_interface_query: + * + * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer + * identifying the stride between array elements of the top-level + * shader storage block member containing the active variable is + * written to <params>. For top-level block members declared as + * arrays, the value written is the difference, in basic machine + * units, between the offsets of the active variable for + * consecutive elements in the top-level array. For top-level + * block members not declared as an array, zero is written to + * <params>." + */ + if (field->type->is_array()) { + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(field->matrix_layout); + bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + const glsl_type *array_type = field->type->fields.array; + + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 0; + + if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { + if (array_type->is_record() || array_type->is_array()) + return glsl_align(array_type->std140_size(row_major), 16); + else + return MAX2(array_type->std140_base_alignment(row_major), 16); + } else { + return array_type->std430_array_stride(row_major); } } -found_top_level_array_size: - free(interface_name); - free(var_name); - uni->top_level_array_size = array_size; + return 0; } static void -calculate_array_stride(struct gl_shader_program *shProg, - struct gl_uniform_storage *uni) +calculate_array_size_and_stride(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) { int block_index = uni->block_index; + int array_size = -1; int array_stride = -1; char *var_name = get_top_level_name(uni->name); char *interface_name = @@ -3549,9 +3610,17 @@ calculate_array_stride(struct gl_shader_program *shProg, if (strcmp(var_name, interface_name) == 0) { /* Deal with instanced array of SSBOs */ char *temp_name = get_var_name(uni->name); + if (!temp_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } free(var_name); var_name = get_top_level_name(temp_name); free(temp_name); + if (!var_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } } for (unsigned i = 0; i < shProg->NumShaders; i++) { @@ -3567,61 +3636,26 @@ calculate_array_stride(struct gl_shader_program *shProg, const glsl_type *interface = var->get_interface_type(); - if (strcmp(interface_name, interface->name) != 0) { + if (strcmp(interface_name, interface->name) != 0) continue; - } for (unsigned i = 0; i < interface->length; i++) { const glsl_struct_field *field = &interface->fields.structure[i]; if (strcmp(field->name, var_name) != 0) continue; - /* From GL_ARB_program_interface_query: - * - * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer - * identifying the stride between array elements of the top-level - * shader storage block member containing the active variable is - * written to <params>. For top-level block members declared as - * arrays, the value written is the difference, in basic machine - * units, between the offsets of the active variable for - * consecutive elements in the top-level array. For top-level - * block members not declared as an array, zero is written to - * <params>." - */ - if (field->type->is_array()) { - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(field->matrix_layout); - bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; - const glsl_type *array_type = field->type->fields.array; - - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) { - array_stride = 0; - goto found_top_level_array_stride; - } - if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { - if (array_type->is_record() || array_type->is_array()) { - array_stride = array_type->std140_size(row_major); - array_stride = glsl_align(array_stride, 16); - } else { - unsigned element_base_align = 0; - element_base_align = array_type->std140_base_alignment(row_major); - array_stride = MAX2(element_base_align, 16); - } - } else { - array_stride = array_type->std430_array_stride(row_major); - } - } else { - array_stride = 0; - } - goto found_top_level_array_stride; + + array_stride = get_array_stride(uni, interface, field, + interface_name, var_name); + array_size = get_array_size(uni, field, interface_name, var_name); + goto write_top_level_array_size_and_stride; } } } -found_top_level_array_stride: +write_top_level_array_size_and_stride: free(interface_name); free(var_name); uni->top_level_array_stride = array_stride; + uni->top_level_array_size = array_size; } /** @@ -3664,6 +3698,9 @@ build_program_resource_list(struct gl_shader_program *shProg) return; } + if (!add_fragdata_arrays(shProg)) + return; + /* Add inputs and outputs to the resource list. */ if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, GL_PROGRAM_INPUT)) @@ -3709,8 +3746,7 @@ build_program_resource_list(struct gl_shader_program *shProg) continue; if (is_shader_storage) { - calculate_array_size(shProg, &shProg->UniformStorage[i]); - calculate_array_stride(shProg, &shProg->UniformStorage[i]); + calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]); } if (!add_program_resource(shProg, type, diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 129dd02781b..ba14bbbeb6a 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -306,6 +306,7 @@ nir_visitor::visit(ir_variable *ir) var->data.read_only = ir->data.read_only; var->data.centroid = ir->data.centroid; var->data.sample = ir->data.sample; + var->data.patch = ir->data.patch; var->data.invariant = ir->data.invariant; var->data.location = ir->data.location; @@ -396,8 +397,6 @@ nir_visitor::visit(ir_variable *ir) var->data.index = ir->data.index; var->data.descriptor_set = 0; var->data.binding = ir->data.binding; - /* XXX Get rid of buffer_index */ - var->data.atomic.buffer_index = ir->data.binding; var->data.atomic.offset = ir->data.atomic.offset; var->data.image.read_only = ir->data.image_read_only; var->data.image.write_only = ir->data.image_write_only; @@ -722,6 +721,8 @@ nir_visitor::visit(ir_call *ir) op = nir_intrinsic_ssbo_atomic_exchange; } else if (strcmp(ir->callee_name(), "__intrinsic_ssbo_atomic_comp_swap_internal") == 0) { op = nir_intrinsic_ssbo_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { + op = nir_intrinsic_shader_clock; } else { unreachable("not reached"); } @@ -826,6 +827,10 @@ nir_visitor::visit(ir_call *ir) case nir_intrinsic_memory_barrier: nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); break; + case nir_intrinsic_shader_clock: + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_instr_insert_after_cf_list(this->cf_node_list, &instr->instr); + break; case nir_intrinsic_store_ssbo: { exec_node *param = ir->actual_parameters.get_head(); ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 793bdafb54b..5f03095d673 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -1557,12 +1557,14 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_num_work_groups; case SYSTEM_VALUE_PRIMITIVE_ID: return nir_intrinsic_load_primitive_id; - /* FINISHME: Add tessellation intrinsics. case SYSTEM_VALUE_TESS_COORD: - case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_tess_coord; case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return nir_intrinsic_load_tess_level_outer; case SYSTEM_VALUE_TESS_LEVEL_INNER: - */ + return nir_intrinsic_load_tess_level_inner; + case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_patch_vertices_in; default: unreachable("system value does not directly correspond to intrinsic"); } @@ -1598,13 +1600,14 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_WORK_GROUP_ID; case nir_intrinsic_load_primitive_id: return SYSTEM_VALUE_PRIMITIVE_ID; - /* FINISHME: Add tessellation intrinsics. + case nir_intrinsic_load_tess_coord: return SYSTEM_VALUE_TESS_COORD; - return SYSTEM_VALUE_VERTICES_IN; - return SYSTEM_VALUE_PRIMITIVE_ID; + case nir_intrinsic_load_tess_level_outer: return SYSTEM_VALUE_TESS_LEVEL_OUTER; + case nir_intrinsic_load_tess_level_inner: return SYSTEM_VALUE_TESS_LEVEL_INNER; - */ + case nir_intrinsic_load_patch_vertices_in: + return SYSTEM_VALUE_VERTICES_IN; default: unreachable("intrinsic doesn't produce a system value"); } diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 229d534bf3d..9b278d6a767 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -171,6 +171,7 @@ typedef struct { unsigned read_only:1; unsigned centroid:1; unsigned sample:1; + unsigned patch:1; unsigned invariant:1; /** @@ -313,7 +314,6 @@ typedef struct { * Location an atomic counter is stored at. */ struct { - unsigned buffer_index; unsigned offset; } atomic; @@ -2016,7 +2016,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); void nir_lower_two_sided_color(nir_shader *shader); -void nir_lower_atomics(nir_shader *shader); +void nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program); void nir_lower_to_source_mods(nir_shader *shader); bool nir_lower_gs_intrinsics(nir_shader *shader); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index b2ceff566cf..9fd91de157f 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -83,6 +83,14 @@ BARRIER(discard) */ BARRIER(memory_barrier) +/* + * Shader clock intrinsic with semantics analogous to the clock2x32ARB() + * GLSL intrinsic. + * The latter can be used as code motion barrier, which is currently not + * feasible with NIR. + */ +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) + /** A conditional discard, with a single boolean source. */ INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) @@ -217,6 +225,10 @@ SYSTEM_VALUE(sample_pos, 2, 0) SYSTEM_VALUE(sample_mask_in, 1, 0) SYSTEM_VALUE(primitive_id, 1, 0) SYSTEM_VALUE(invocation_id, 1, 0) +SYSTEM_VALUE(tess_coord, 3, 0) +SYSTEM_VALUE(tess_level_outer, 4, 0) +SYSTEM_VALUE(tess_level_inner, 2, 0) +SYSTEM_VALUE(patch_vertices_in, 1, 0) SYSTEM_VALUE(local_invocation_id, 3, 0) SYSTEM_VALUE(work_group_id, 3, 0) SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c index 46e137652a1..40ca3de96cf 100644 --- a/src/glsl/nir/nir_lower_atomics.c +++ b/src/glsl/nir/nir_lower_atomics.c @@ -25,17 +25,24 @@ * */ +#include "ir_uniform.h" #include "nir.h" #include "main/config.h" #include <assert.h> +typedef struct { + const struct gl_shader_program *shader_program; + nir_shader *shader; +} lower_atomic_state; + /* * replace atomic counter intrinsics that use a variable with intrinsics * that directly store the buffer index and byte offset */ static void -lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) +lower_instr(nir_intrinsic_instr *instr, + lower_atomic_state *state) { nir_intrinsic_op op; switch (instr->intrinsic) { @@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl) return; /* atomics passed as function arguments can't be lowered */ void *mem_ctx = ralloc_parent(instr); + unsigned uniform_loc = instr->variables[0]->var->data.location; nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); new_instr->const_index[0] = - (int) instr->variables[0]->var->data.atomic.buffer_index; + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset; @@ -132,18 +140,25 @@ lower_block(nir_block *block, void *state) { nir_foreach_instr_safe(block, instr) { if (instr->type == nir_instr_type_intrinsic) - lower_instr(nir_instr_as_intrinsic(instr), state); + lower_instr(nir_instr_as_intrinsic(instr), + (lower_atomic_state *) state); } return true; } void -nir_lower_atomics(nir_shader *shader) +nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program) { + lower_atomic_state state = { + .shader = shader, + .shader_program = shader_program, + }; + nir_foreach_overload(shader, overload) { if (overload->impl) { - nir_foreach_block(overload->impl, lower_block, overload->impl); + nir_foreach_block(overload->impl, lower_block, (void *) &state); nir_metadata_preserve(overload->impl, nir_metadata_block_index | nir_metadata_dominance); } diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index f2d584fe484..3c0f1da94af 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +# Saturated vector add for 4 8bit ints. +binop("usadd_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; +} +""") + +# Saturated vector subtract for 4 8bit ints. +binop("ussub_4x8", tint, "", """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; +} +""") + +# vector min for 4 8bit ints. +binop("umin_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# vector max for 4 8bit ints. +binop("umax_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# unorm multiply: (a * b) / 255. +binop("umul_unorm_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; +} +""") + binop("fpow", tfloat, "", "powf(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py index cafbd6d66a5..30ede52b146 100644 --- a/src/glsl/nir/nir_opt_algebraic.py +++ b/src/glsl/nir/nir_opt_algebraic.py @@ -56,12 +56,16 @@ optimizations = [ (('iabs', ('ineg', a)), ('iabs', a)), (('fadd', a, 0.0), a), (('iadd', a, 0), a), + (('usadd_4x8', a, 0), a), + (('usadd_4x8', a, ~0), ~0), (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('fadd', ('fneg', a), a), 0.0), (('iadd', ('ineg', a), a), 0), (('fmul', a, 0.0), 0.0), (('imul', a, 0), 0), + (('umul_unorm_4x8', a, 0), 0), + (('umul_unorm_4x8', a, ~0), a), (('fmul', a, 1.0), a), (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), @@ -202,6 +206,8 @@ optimizations = [ # Subtracts (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('ussub_4x8', a, 0), a), + (('ussub_4x8', a, ~0), 0), (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c index 09663996869..30220c5e48d 100644 --- a/src/glsl/nir/nir_print.c +++ b/src/glsl/nir/nir_print.c @@ -228,12 +228,13 @@ print_var_decl(nir_variable *var, print_state *state) const char *const cent = (var->data.centroid) ? "centroid " : ""; const char *const samp = (var->data.sample) ? "sample " : ""; + const char *const patch = (var->data.patch) ? "patch " : ""; const char *const inv = (var->data.invariant) ? "invariant " : ""; const char *const mode[] = { "shader_in ", "shader_out ", "", "", "uniform ", "shader_storage", "system " }; - fprintf(fp, "%s%s%s%s%s ", - cent, samp, inv, mode[var->data.mode], + fprintf(fp, "%s%s%s%s%s%s ", + cent, samp, patch, inv, mode[var->data.mode], glsl_interp_qualifier_name(var->data.interpolation)); glsl_print_type(var->type, fp); diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp index 31719d20c05..68b70eedf92 100644 --- a/src/glsl/opt_dead_builtin_varyings.cpp +++ b/src/glsl/opt_dead_builtin_varyings.cpp @@ -269,14 +269,14 @@ public: */ class replace_varyings_visitor : public ir_rvalue_visitor { public: - replace_varyings_visitor(exec_list *ir, + replace_varyings_visitor(struct gl_shader *sha, const varying_info_visitor *info, unsigned external_texcoord_usage, unsigned external_color_usage, bool external_has_fog) - : info(info), new_fog(NULL) + : shader(sha), info(info), new_fog(NULL) { - void *const ctx = ir; + void *const ctx = shader->ir; memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); @@ -293,14 +293,16 @@ public: * occurrences of gl_TexCoord will be replaced with. */ if (info->lower_texcoord_array) { - prepare_array(ir, this->new_texcoord, ARRAY_SIZE(this->new_texcoord), + prepare_array(shader->ir, this->new_texcoord, + ARRAY_SIZE(this->new_texcoord), VARYING_SLOT_TEX0, "TexCoord", mode_str, info->texcoord_usage, external_texcoord_usage); } /* Handle gl_FragData in the same way like gl_TexCoord. */ if (info->lower_fragdata_array) { - prepare_array(ir, this->new_fragdata, ARRAY_SIZE(this->new_fragdata), + prepare_array(shader->ir, this->new_fragdata, + ARRAY_SIZE(this->new_fragdata), FRAG_RESULT_DATA0, "FragData", mode_str, info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); } @@ -340,7 +342,7 @@ public: } /* Now do the replacing. */ - visit_list_elements(this, ir); + visit_list_elements(this, shader->ir); } void prepare_array(exec_list *ir, @@ -389,6 +391,13 @@ public: /* Remove the gl_FragData array. */ if (this->info->lower_fragdata_array && var == this->info->fragdata_array) { + + /* Clone variable for program resource list before it is removed. */ + if (!shader->fragdata_arrays) + shader->fragdata_arrays = new (shader) exec_list; + + shader->fragdata_arrays->push_tail(var->clone(shader, NULL)); + var->remove(); } @@ -487,6 +496,7 @@ public: } private: + struct gl_shader *shader; const varying_info_visitor *info; ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; @@ -498,20 +508,20 @@ private: } /* anonymous namespace */ static void -lower_texcoord_array(exec_list *ir, const varying_info_visitor *info) +lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) { - replace_varyings_visitor(ir, info, + replace_varyings_visitor(shader, info, (1 << MAX_TEXTURE_COORD_UNITS) - 1, 1 | 2, true); } static void -lower_fragdata_array(exec_list *ir) +lower_fragdata_array(struct gl_shader *shader) { varying_info_visitor info(ir_var_shader_out, true); - info.get(ir, 0, NULL); + info.get(shader->ir, 0, NULL); - replace_varyings_visitor(ir, &info, 0, 0, 0); + replace_varyings_visitor(shader, &info, 0, 0, 0); } @@ -523,7 +533,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, { /* Lower the gl_FragData array to separate variables. */ if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { - lower_fragdata_array(consumer->ir); + lower_fragdata_array(consumer); } /* Lowering of built-in varyings has no effect with the core context and @@ -544,7 +554,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (!consumer) { /* At least eliminate unused gl_TexCoord elements. */ if (producer_info.lower_texcoord_array) { - lower_texcoord_array(producer->ir, &producer_info); + lower_texcoord_array(producer, &producer_info); } return; } @@ -556,7 +566,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (!producer) { /* At least eliminate unused gl_TexCoord elements. */ if (consumer_info.lower_texcoord_array) { - lower_texcoord_array(consumer->ir, &consumer_info); + lower_texcoord_array(consumer, &consumer_info); } return; } @@ -566,7 +576,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (producer_info.lower_texcoord_array || producer_info.color_usage || producer_info.has_fog) { - replace_varyings_visitor(producer->ir, + replace_varyings_visitor(producer, &producer_info, consumer_info.texcoord_usage, consumer_info.color_usage, @@ -587,7 +597,7 @@ do_dead_builtin_varyings(struct gl_context *ctx, if (consumer_info.lower_texcoord_array || consumer_info.color_usage || consumer_info.has_fog) { - replace_varyings_visitor(consumer->ir, + replace_varyings_visitor(consumer, &consumer_info, producer_info.texcoord_usage, producer_info.color_usage, |